# BiocManager::install("mixOmicsTeam/mixOmics")
# remotes::install_github("ying14/yingtools2")
# remotes::install_github("mikabr/ggpirate")
# devtools::install_github("EmilHvitfeldt/paletteer")
library(pacman)
p_load(
factoextra, # Dimension reduction
FactoMineR, # Dimension reduction
stabs, # Stability variable selection
mboost, # Gradient boosting for model building
ggrepel, # Visualization, repels labels on plots
bestglm, # Logistic regression
knitr, # To change R markdown PDF options
caret, # To calculate model paramters
cutpointr, # Calculate cutpoints
RPostgreSQL, # Connect to PostgreSQL server
phyloseq, # Phyloseq data wrangling
microbiomeMarker, # LEfSe analysis
ComplexHeatmap, # Heatmap generator
paletteer, # Extensive color palette
tidyverse, # Data wrangling and visualization
circlize, # Color ramp builder
rstatix, # Tidyverse statistics
EnhancedVolcano, # Volcano plot analysis
umap, # Uniform Manifold Approximation Projection
ggpubr, # Simple ggplots with stats
ggpirate, # ggplot version of Pirate plot
mixOmics, # PLS-DA
conflicted, # Forces conflicted packages to be used by preferred package
gridExtra, # Arrange multiple grobs
reticulate, # Run python in R
tableone, # Additional clinical tables support
lubridate, # Manipulate date objects
yingtools2, # Custom plotting functions
cowplot, # Combine plots together
epiR, # Obtain model performance measures
pROC, # Produce ROC curves
plotrix, # Add table to base R plot
glmnet, # LASSO regression
forcats, # Factor reordering
ggpmisc, # Miscellaneous ggplot helper functions
doParallel, # Parallelize functions (for optimizing cutpoints)
PRISMAstatement, # Flow table generator
DiagrammeRsvg, # Flow table visualizer
rsvg, # Flow table visualizer
scales, # Scale functions for visualizations
devtools, # To load packages from GitHub
install = F
)
library("gtsummary") # Clinical tables, pacman had trouble loading this in, so had to go the manual route
conflict_prefer("select", "dplyr")
conflict_prefer("mutate", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("rename", "dplyr")
conflict_prefer("slice", "dplyr")
conflict_prefer("between", "dplyr")
conflict_prefer("annotate", "ggplot2")
devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")
# ggplot theme shortcuts
et <- element_text
eb <- element_blank
er <- element_rect
el <- element_line
opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
`%!in%` <- negate(`%in%`)
# Running python in R
## User must set path to their python library
reticulate::use_python("/usr/bin/python3")
# Customized lefse function
source("./Data/run_lefse2.R")# 1) Load R image load('./Data/LT_Modeling.RData')
# OR #
# 2) Individually Load R Objects
# Sample lookup
sample_lookup <- readRDS("./Data/sample_lookup.rds")
# First samples from all patients
first_samps <- readRDS("./Data/first_samps_anon.rds")
# Simplified dataframe containing infection information and
# relative abundance of targeted taxa
peri_matrix_all <- readRDS("./Data/peri_matrix_clin_all.rds")
# Distinct detailed infection data
peri_criteria_best <- readRDS("./Data/peri_criteria_best_anon.rds")
# All detailed infection data
peri_criteria_all <- readRDS("./Data/peri_criteria_all_anon.rds")
# NCBI taxonomy lookup
tax_lookup <- readRDS("./Data/tax_lookup.rds")
# Complete metaphlan dataframe for all patients and healthy
# donors
metaphlan_df <- readRDS("./Data/metaphlan_anon.rds")
# Metaphlan dataframe of patient samples
metaphlan_peri_anon <- readRDS("./Data/metaphlan_peri_anon.rds")
# Custom color palette
metaphlan_pal <- getRdpPal(metaphlan_df)
# Qualitative metabolomics
metab_qual_anon <- readRDS("./Data/metab_qual_anon.rds")
# Quantitative metabolomics
metab_quant_anon <- readRDS("./Data/metab_quant_anon.rds")
# Antibiotics data
abx <- readRDS("./Data/abx_anon.rds")
# Demographics data
demo <- readRDS("./Data/demo_anon.rds")# Custom function to avoid any errors during map
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")
# Calculate the number of cores
no_cores <- detectCores() - 2
# create the cluster for caret to use
cl <- makePSOCKcluster(no_cores)
registerDoParallel(cl)
set.seed(123456)
test_abundance <- peri_matrix_all %>%
mutate(sampleID = as.factor(sampleID)) %>%
select(starts_with(c("entero", "esch", "klebs", "citro",
"rahn", "proteus")), -ends_with("abs_abundance")) %>%
mutate_if(is.character, as.numeric) %>%
mutate(enterococcus_infection = ifelse(`Enterococcus faecium` +
`Enterococcus faecalis` + `Enterococcus avium` >= 1,
1, 0), enterobacterales_infection = ifelse(`Escherichia coli` +
`Klebsiella pneumoniae` + `Citrobacter freundii` + `Proteus mirabilis` >=
1, 1, 0)) %>%
select(-c(`Enterococcus faecium`, `Enterococcus faecalis`,
`Enterococcus avium`, `Escherichia coli`, `Klebsiella pneumoniae`,
`Citrobacter freundii`, `Proteus mirabilis`)) %>%
pivot_longer(-c(enterococcus_infection, enterobacterales_infection),
names_to = "variable", values_to = "value") %>%
pivot_longer(-c(variable, value), names_to = "infection_type",
values_to = "infection") %>%
mutate(variable = paste0("Input: ", variable, "\nPredict: ",
infection_type)) %>%
group_by(infection_type, variable) %>%
group_map(~safe_cutpointr(., value, infection, variable,
method = maximize_metric, metric = youden, pos_class = 1,
boot_runs = 100, allowParallel = TRUE, na.rm = T), .keep = TRUE)
# to get cutpoint object
test_abundance[4][[1]] # ecoc rel abd, ecoc infection## # A tibble: 1 × 18
## subgroup direction
## <chr> <chr>
## 1 "Input: enterococcus_rel_abundance\nPredict: enterococcus_infection" >=
## optimal_cutpoint method youden acc sensitivity specificity
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0.199356 maximize_metric 0.577090 0.723214 0.882353 0.694737
## AUC pos_class neg_class prevalence outcome predictor grouping
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 0.829721 1 0 0.151786 infection value variable
## data roc_curve boot
## <list> <list> <list>
## 1 <tibble [112 × 2]> <rc_ctpnt [82 × 10]> <tibble [100 × 23]>
test_abundance[1][[1]] # ebac rel abd, ebacc infection## # A tibble: 1 × 18
## subgroup
## <chr>
## 1 "Input: enterobacterales_rel_abundance\nPredict: enterobacterales_infection"
## direction optimal_cutpoint method youden acc sensitivity
## <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 >= 0.0247691 maximize_metric 0.798077 0.8125 1
## specificity AUC pos_class neg_class prevalence outcome predictor
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0.798077 0.905048 1 0 0.0714286 infection value
## grouping data roc_curve boot
## <chr> <list> <list> <list>
## 1 variable <tibble [112 × 2]> <rc_ctpnt [59 × 10]> <tibble [100 × 23]>
test_abundance_unnest <- test_abundance %>%
map_df(as_tibble)
test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
select(-boot) %>%
mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3) *
100, digits = 0, format = "f"), "%"), auc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(auc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(auc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), acc_label = paste0("Accuracy = ",
formatC(round(acc, 3) * 100, digits = 0, format = "f"),
"%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
"%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
"%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"))) %>%
filter(grepl(pattern = "rel_abundance", x = Input)) %>%
mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
replacement = "Relative Abundance (%)", x = subgroup),
subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
subgroup2 = factor(subgroup2, levels = c("Enterococcus",
"Enterobacterales"))) %>%
ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
geom_text(aes(label = auc_label, x = 0.5, y = 0.13), show.legend = F,
hjust = 0) + geom_text(aes(label = acc_label, x = 0.5,
y = 0.09), show.legend = F, hjust = 0) + geom_text(aes(label = spec_label,
x = 0.5, y = 0.05), show.legend = F, hjust = 0) + geom_text(aes(label = sens_label,
x = 0.5, y = 0.01), show.legend = F, hjust = 0) + geom_text(aes(label = paste0("Cutpoint = ",
round((optimal_cutpoint), digits = 3) * 100, "%"), x = 0.5,
y = 0.17), hjust = 0, show.legend = F) + theme_bw() + theme(axis.text = et(color = "black",
size = 12), axis.title = et(color = "black", size = 14),
legend.text = et(size = 12), legend.title = et(size = 14),
legend.spacing.y = unit(0.5, "cm"), legend.position = "none",
panel.grid = eb(), strip.text = et(size = 14), strip.background = eb(),
panel.border = eb(), panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
scale_x_continuous(expand = expansion(add = c(0.001, 0.05))) +
scale_y_continuous(expand = expansion(add = c(0.001, 0.05))) +
facet_wrap(~subgroup2, ncol = 1, scales = "fixed") + ylab("True Positive Rate\n") +
xlab("\nFalse Positive Rate") + scale_color_manual(values = c("#2dc46b",
"red")) + guides(color = guide_legend("Groups", byrow = T,
override.aes = list(size = 5)))ggsave(filename = "./Results/Figure_4C.pdf", height = 12, width = 7)
# obtain threshold cutpoints for relative abundance
optimal_cutpoint_rel <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
filter(grepl("rel", subgroup)) %>%
select(subgroup, optimal_cutpoint)# Heatmap compounds and their categories
heatmap_lookup <- read.csv("./Data/qual_heatmap_lookup.csv",
stringsAsFactors = FALSE)
# Build heatmap compound list
heatmap_cmpds <- metab_qual_anon %>%
mutate(compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_lookup$compound) %>%
distinct(compound) %>%
drop_na()
qual_log2fc_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
summarise(log2fc_val = log((mean(mvalue[enterococcus_expansion ==
"0"], na.rm = T)/mean(mvalue[enterococcus_expansion ==
"1"], na.rm = T)), base = 2)) # 0 = No Expansion, 1 = Expansion
qual_pval_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
rstatix::wilcox_test(mvalue ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj")
qual_tot_ecoc_expan <- left_join(qual_log2fc_ecoc_expan, qual_pval_ecoc_expan) %>%
column_to_rownames(var = "compound")
# volcano label color
ecoc_expan_volcano_labcol <- qual_tot_ecoc_expan %>%
filter(p.adj <= 0.05 & abs(log2fc_val) >= 0.75) %>%
mutate(color = ifelse(log2fc_val < 0, "#389458", "gray47"))
# Volcano Plot (adjusted)
set.seed(123456)
volcano_adj_ecoc_expan <- EnhancedVolcano(qual_tot_ecoc_expan,
lab = rownames(qual_tot_ecoc_expan), x = "log2fc_val", y = "p.adj",
title = NULL, pCutoff = 0.05, FCcutoff = 0.75, pointSize = 4,
labSize = 4, labCol = ecoc_expan_volcano_labcol$color, caption = NULL,
colAlpha = 0.65, col = c("gray75", c("#D4CA15", "#912777",
"#1238E3")), xlim = c(-2.5, 4), ylim = c(0, 8), legendPosition = "right",
legendLabels = c(expression(p.adj > 0.05 * ";" ~ Log[2] ~
FC < "±" * 0.75), expression(p.adj > 0.05 * ";" ~ Log[2] ~
FC >= "±" * 0.75), expression(p.adj <= 0.05 * ";" ~
Log[2] ~ FC < "±" * 0.75), expression(p.adj <= 0.05 *
";" ~ Log[2] ~ FC >= "±" * 0.75)), legendLabSize = 14,
boxedLabels = T, drawConnectors = T, widthConnectors = 0.2,
arrowheads = F, gridlines.minor = F, gridlines.major = F,
max.overlaps = Inf) + theme(axis.text = et(color = "black"),
legend.text = et(hjust = 0), plot.margin = unit(c(0, 4, 2,
4), "cm")) + labs(subtitle = NULL) + annotate("segment",
x = 0.8, xend = 3, y = 7.95, yend = 7.95, arrow = arrow(),
size = 2, color = "gray67") + annotate("text", x = 1.65,
y = 8.15, label = "No Expansion", size = 6, color = "gray67") +
annotate("rect", xmin = 0.75, xmax = Inf, ymin = -log(0.05,
base = 10), ymax = Inf, alpha = 0.1, fill = "gray87") +
annotate("segment", x = -0.8, xend = -2.5, y = 7.95, yend = 7.95,
arrow = arrow(), size = 2, color = "#389458") + annotate("text",
x = -1.55, y = 8.15, label = "Expansion", size = 6, color = "#389458") +
annotate("rect", xmin = -0.75, xmax = -Inf, ymin = -log(0.05,
base = 10), ymax = Inf, alpha = 0.1, fill = "#389458") +
guides(color = guide_legend(nrow = 4), shape = guide_legend(nrow = 4)) +
ggtitle("Enterococcus Expansion") + scale_y_continuous(expand = expansion(add = c(0,
0.15)))
volcano_adj_ecoc_expanggsave(plot = volcano_adj_ecoc_expan, filename = "./Results/Figure_5A.pdf",
width = 24, height = 8)# Using unadjusted p-values for down-selection of
# metabolites, show distribution of normalized peak area
boxplot_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0), enterococcus_expansion = ifelse(enterococcus_expansion ==
1, "Expansion", "No Expansion")) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"No Expansion"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"Expansion"])) %>%
filter(any(mvalue != 0)) %>%
right_join(qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
mutate(abs_log2fc_val = abs(log2fc_val)) %>%
filter(p <= 0.05, abs_log2fc_val >= 1))
if (nrow(boxplot_ecoc_expan) > 0) {
print(ggpar(ggboxplot(boxplot_ecoc_expan, x = "enterococcus_expansion",
y = "mvalue", color = "enterococcus_expansion", palette = c("#389458",
"gray87"), ylab = "Normalized Peak Area", xlab = "",
outlier.shape = NA), legend.title = "Enterococcus") +
stat_compare_means(label.y.npc = 0.75) + facet_wrap(~compound,
scales = "free_y") + geom_point(data = boxplot_ecoc_expan,
aes(x = enterococcus_expansion, y = mvalue, color = enterococcus_expansion),
position = position_jitter(width = 0.2), size = 2, alpha = 0.65))
ggsave(filename = "./Results/enterococcus_expansion_boxplot.pdf",
width = 24, height = 18)
} else {
print("no significant observations")
}# Custom colors
pirate_colors <- c("#1A49BE", "#3A001E")
pirate_colors2 <- c("#3A001E", "#3A001E", "#3A001E", "#1A49BE")
t_metaphlan <- metaphlan_df %>%
filter(sampleID %in% first_samps$sampleID | grepl(sampleID,
pattern = "ht")) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant",
"Healthy Donor")) %>%
select(sampleID, taxid, db, pctseqs, Total) %>%
group_by(sampleID, taxid, pctseqs) %>%
slice(1) %>%
ungroup() %>%
filter(pctseqs >= 1e-04) %>%
group_by(sampleID) %>%
dplyr::add_count(taxid, name = "totalSp") %>%
mutate(sampleID_count = length(unique(sampleID)), spPres = totalSp/sampleID_count) %>%
filter(spPres >= 0.1) %>%
select(-c(Total, sampleID_count, spPres, totalSp)) %>%
group_by(sampleID) %>%
mutate(pctseqs = pctseqs/sum(pctseqs))
t_metaphlan_mat <- t_metaphlan %>%
distinct() %>%
pivot_wider(names_from = "taxid", values_from = "pctseqs",
values_fill = 0) %>%
column_to_rownames(var = "sampleID") %>%
select(-db)
# taxUMAP microbiota table
taxumap_microbiota <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
mutate(taxid = paste0("taxID", taxid)) %>%
pivot_wider(names_from = "taxid", values_from = "pctseq")
write.csv(taxumap_microbiota, "./Results/microbiota_table.csv",
row.names = FALSE)
# taxUMAP taxonomy table
taxumap_taxonomy <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
left_join(tax_lookup %>%
mutate(taxid = as.character(taxid))) %>%
mutate(taxid = paste0("taxID", taxid)) %>%
distinct(Kingdom, Phylum, Class, Order, Family, Genus, Species,
taxid) %>%
transmute(OTU = taxid, Kingdom, Phylum = if_else(Phylum ==
"", "unclassified", Phylum), Class = if_else(Class ==
"", "unclassified", Class), Order = if_else(Order ==
"", "unclassified", Order), Family = if_else(Family ==
"", "unclassified", Family), Genus = if_else(Genus ==
"", "unclassified", Genus), Genus = gsub(" ", "_", Genus),
Species = if_else(Species == "", "unclassified", Species),
Species = gsub(" ", "_", Species))
write.csv(taxumap_taxonomy, "./Results/taxonomy.csv", row.names = FALSE)import sys
print(sys.path)
# USER MUST SET THEIR PATH TO THEIR LOCALLY INSTALLED TAXUMAP LOCATION## ['', '/usr/bin', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python38.zip', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/lib-dynload', '/Users/nick/Library/Python/3.8/lib/python/site-packages', '/opt/anaconda3/bin/taxumap', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages', '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/reticulate/python']
sys.path.insert(0, '/Users/nick/Documents/taxumap/taxumap/')
from taxumap.taxumap_base import Taxumap
# From file
tu = Taxumap(taxonomy='./Results/taxonomy.csv', microbiota_data='./Results/microbiota_table.csv',random_state=456)
# Transform the data (an inplace function)## Phylum Class
## not monophyletic
## Class Order
## not monophyletic
## Order Family
## not monophyletic
## Family Genus
## not monophyletic
## post validate inputs main Kingdom ... Species
## ASV ...
## taxID817 Bacteria ... Bacteroides_fragilis
## taxID818 Bacteria ... Bacteroides_thetaiotaomicron
## taxID820 Bacteria ... Bacteroides_uniformis
## taxID821 Bacteria ... Phocaeicola_vulgatus
## taxID823 Bacteria ... Parabacteroides_distasonis
## ... ... ... ...
## taxID28447 Bacteria ... Clavibacter_michiganensis
## taxID33968 Bacteria ... Leuconostoc_pseudomesenteroides
## taxID709323 Bacteria ... Fructobacillus_tropaeoli
## taxID1070421 Bacteria ... Periweissella_fabalis
## taxID2749962 Bacteria ... Lactococcus_paracarnosus
##
## [672 rows x 7 columns]
##
## /opt/anaconda3/bin/taxumap/taxumap/dataloading.py:86: UserWarning: Reading taxonomy table. Assuming columns are ordered by phylogeny with in descending order of hierarchy:
## e.g. Kingdom, Phylum, ... , Genus, Species, etc.
## Additionally, the OTU or ASV column must be labeled as 'OTU' or 'ASV' unless otherwise specified
## warnings.warn(
tu.transform_self(neigh=55)
# Raw embedding dataframe## Taxumap(agg_levels = ['Phylum', 'Family'], weights = [1, 1])
##
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:91: UserWarning: Setting min_dist to 0.05/sum(weights)
## warnings.warn("Setting min_dist to 0.05/sum(weights)")
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:102: UserWarning: Setting epochs to 5000
## warnings.warn("Setting epochs to %d" % epochs)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Phylum
## warnings.warn("aggregating on %s" % agg_level)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Family
## warnings.warn("aggregating on %s" % agg_level)
embedding_df = tu.df_embedding#### Alpha Diversity ####
# Alpha diversity matrix: Inverse Simpson
alpha_invsim <- vegan::diversity(t_metaphlan_mat, index = "invsimpson") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("InvSimpson" = ".")
# Alpha Diversity matrix: Shannon
alpha_shannon <- vegan::diversity(t_metaphlan_mat, index = "shannon") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Shannon" = ".")
# Alpha Diversity matrix: Observed ASVs
alpha_richness <- vegan::specnumber(t_metaphlan_mat) %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Richness" = ".")
#### taxUMAP #####
# Find most abundant taxa per sample and plot just that
top_tax <- t_metaphlan %>%
group_by(sampleID) %>%
slice_max(pctseqs, n = 1) %>%
left_join(tax_lookup) %>%
mutate(across(everything(), ~ifelse(.=="", NA, as.character(.)))) %>%
replace_na(list(Species="unclassified",
Genus="unclassified",
Family="unclassified",
Order="unclassified",
Class="unclassified",
Phylum="unclassified")) %>%
mutate(Genus=ifelse(Genus=="unclassified",
paste(Family,Genus,sep="\n"),
as.character(Genus)),
pctseqs = as.numeric(pctseqs))
metaphlan_df2 <- t_metaphlan %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
left_join(tax_lookup) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum,"-",Order,"-", Family, "-",Genus)) %>%
left_join(alpha_shannon) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs),
y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct)
metaphlan_df_sumry <-
metaphlan_df2 %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(healthy_min_shannon = min(Shannon[db == "Healthy Donor"]),
diversity_group = case_when(
db == "Healthy Donor" ~ "Healthy Donor",
Shannon >= healthy_min_shannon ~ "High Diversity",
TRUE ~ "TBD"
),
lt_med_shannon = median(Shannon[diversity_group == "TBD"], na.rm = TRUE),
diversity_group = case_when(
diversity_group %in% c("Healthy Donor", "High Diversity") ~ diversity_group,
Shannon >= lt_med_shannon ~ "Medium Diversity",
TRUE ~ "Low Diversity"
),
diversity_group = factor(
diversity_group,
levels = c(
"Low Diversity",
"Medium Diversity",
"High Diversity",
"Healthy Donor"
)
),
diversity_group_abv = gsub(pattern = " Diversity",
replacement = "",
x = diversity_group),
diversity_group_abv = factor(diversity_group_abv,
levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
arrange(Genus)
tax_umap_mat_plot <- py$embedding_df %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
left_join(t_metaphlan %>%
distinct(sampleID, db)) %>%
left_join(top_tax) %>%
ggplot(aes(x = taxumap1, y = taxumap2, color = Genus, shape = db, size = pctseqs*100, alpha = db))+
geom_point(fill = "black")+
theme_bw() +
theme(panel.grid = eb(),
axis.title = et(color = "black", size = 14),
axis.text = et(color = "black", size = 12),
legend.title = et(color = "black", size = 14),
legend.text = et(color = "black", size = 12),
legend.position = "top"
) +
xlab("taxUMAP1") +
ylab("taxUMAP1") +
guides(
shape = guide_legend(
title = "Cohort",
override.aes = list(size = 4),
order = 1,
nrow = 2,
title.position = "top",
title.hjust = 0.5
),
size = guide_legend(
title = "Relative Abundance (%)",
order = 2,
nrow = 2,
title.position = "top",
title.hjust = 0.5
),
color = "none",
fill = "none",
alpha = "none"
)+
scale_color_manual(values = metaphlan_pal)+
scale_shape_manual(values = c(24, 16))+
scale_alpha_manual(values = c(1, 0.45))
tax_umap_mat_plotggsave(plot = tax_umap_mat_plot,
filename = "./Results/Figure_1B.pdf",
height = 8, width = 8)
#### Alpha Diversity Stats #####
# Obtain stats for alpha diversity
diversity_comps <- list(
c("Healthy Donor", "High"),
c("High", "Medium"),
c("High", "Low"),
c("Medium", "Low")
)
alpha_stats <- alpha_invsim %>%
left_join(alpha_shannon) %>%
left_join(alpha_richness) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv),names_to = "diversity_metric", values_to = "value") %>%
group_by(diversity_metric) %>%
rstatix::wilcox_test(value~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "BH",
alternative= "two.sided"
) %>%
ungroup()
# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan %>%
left_join(tax_lookup) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
filter(grepl(pattern = "Enterococcus|Enterobacterales|Bacteroidetes|Lachnospiraceae|Oscillospiraceae", x = Species)) %>%
mutate(organism = case_when(grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
grepl(pattern = "Bacteroidetes", x = Species) ~ "Bacteroidetes",
grepl(pattern = "Lachnospiraceae", x = Species) ~ "Lachnospiraceae",
grepl(pattern = "Oscillospiraceae", x = Species) ~ "Oscillospiraceae")) %>%
select(sampleID, db, diversity_group_abv, organism, pctseqs) %>%
group_by(sampleID, db, diversity_group_abv, organism) %>%
summarise(pctseqs = sum(pctseqs)) %>%
ungroup() %>%
pivot_wider(names_from = organism, values_from = pctseqs, values_fill = 0) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv), names_to = "organism", values_to = "pctseqs")
# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%
group_by(organism) %>%
rstatix::wilcox_test(pctseqs~diversity_group_abv) %>%
bind_rows(alpha_stats) %>%
rstatix::adjust_pvalue(method = "BH") %>%
mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))
write.csv(rel_abd_alpha_stats, "./Results/Figure_1_Statistics.csv", row.names = FALSE)
#### Plot Inverse Simpson ####
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", "ns"))
diversity_group_colors <- c("#3A001E", "#8A0246", "#C20463", "#1A49BE")
set.seed(456)
gg_alpha_invsim <- alpha_invsim %>%
inner_join(t_metaphlan %>%
distinct(sampleID, db) %>%
select(sampleID, db)
) %>%
inner_join(metaphlan_df %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group, diversity_group_abv)
) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = InvSimpson,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
step.increase = 0.075,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab("Alpha Diversity\n(Inverse Simpson)") +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,45,5),
expand = expansion(mult = c(0.01, 0.1))) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_alpha_invsimpdf(file = "./Results/Figure_1C.pdf", height = 6, width = 7)
gg_alpha_invsim
invisible(dev.off())
#### Plot Shannon Diversity ####
set.seed(456)
gg_alpha_shannon <- alpha_shannon %>%
inner_join(t_metaphlan %>%
distinct(sampleID, db) %>%
select(sampleID, db)
) %>%
inner_join(metaphlan_df %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group, diversity_group_abv)
) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = Shannon,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
step.increase = 0.075,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab("Alpha Diversity\n(Shannon)") +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,6.5,1),
expand = expansion(mult = c(0.01, 0.1))) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_alpha_shannonpdf(file = "./Results/Figure_1D.pdf", height = 6, width = 7)
gg_alpha_shannon
invisible(dev.off())
#### Plot Species Richness ####
set.seed(456)
gg_alpha_richness <- alpha_richness %>%
inner_join(t_metaphlan %>%
distinct(sampleID, db) %>%
select(sampleID, db)
) %>%
inner_join(metaphlan_df %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group, diversity_group_abv)
) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = Richness,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
label.y = c(145, 160, 150, 110),
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab("Alpha Diversity\n(Richness)") +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,180,50),
expand = expansion(mult = c(0.01, 0.035))) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_alpha_richnesspdf(file = "./Results/Figure_1E.pdf", height = 6, width = 7)
gg_alpha_richness
invisible(dev.off())
#### Plot Enterococcus ####
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterococcus") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_ecoc_rel_abdpdf(file = "./Results/Figure_1F.pdf", height = 6, width = 7)
gg_ecoc_rel_abd
invisible(dev.off())
#### Plot Enterobacterales #####
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterobacterales") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_ebac_rel_abdpdf(file = "./Results/Figure_1G.pdf", height = 6, width = 7)
gg_ebac_rel_abd
invisible(dev.off())
#### Plot Bacteroidetes ####
set.seed(456)
gg_bact_rel_abd <- phylo_rel_abd %>%
filter(organism == "Bacteroidetes") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.98, 0.92, 1.02)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Bacteroidetes")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_bact_rel_abdpdf(file = "./Results/Figure_1H.pdf", height = 6, width = 7)
gg_bact_rel_abd
invisible(dev.off())
#### Plot Lachnospiraceae ####
set.seed(456)
gg_lach_rel_abd <- phylo_rel_abd %>%
filter(organism == "Lachnospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.71, 0.76, 0.82)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Lachnospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_lach_rel_abdpdf(file = "./Results/Figure_1I.pdf", height = 6, width = 7)
gg_lach_rel_abd
invisible(dev.off())
#### Plot Oscillospiraceae ####
set.seed(456)
gg_oscl_rel_abd <- phylo_rel_abd %>%
filter(organism == "Oscillospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.33, 0.52, 0.59, 0.66)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Oscillospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))## [1] FALSE
gg_oscl_rel_abdpdf(file = "./Results/Figure_1J.pdf", height = 6, width = 7)
gg_oscl_rel_abd
invisible(dev.off())
#### Plot Metaphlan Relative Abundance ####
# Figure 1A-MetaPhlAn4 Taxonomy
# Load legend
tax_legend <- magick::image_read_pdf("./Data/legend.v2.pdf")
gg_tax_legend <- cowplot::ggdraw() + cowplot::draw_image(tax_legend)
metaphlan_pal2 <- getRdpPal(metaphlan_df2)
gg_metaphlan <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
ungroup() %>%
mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
geom_bar(stat="identity",aes(fill=Genus), width = 0.9) +
scale_fill_manual(values = metaphlan_pal2) +
theme_bw() +
theme(legend.position = "none",
axis.text.x=eb(),
axis.ticks.x=eb(),
strip.text.x= et(angle=0,size=14),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5)) +
facet_grid(. ~diversity_group, scales = "free", space = "free")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") +
xlab("")
# Color facets
gg_metaphlan_grob <- ggplot_gtable(ggplot_build(gg_metaphlan))
strip_both <- which(grepl('strip-', gg_metaphlan_grob$layout$name))
fills <- diversity_group_colors
k <- 1
for (i in strip_both) {
l <- which(grepl('titleGrob', gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$childrenOrder))
gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$children[[l]]$children[[1]]$gp$col <- fills[k]
k <- k+1
}
gg_shannon <- metaphlan_df_sumry %>%
ggplot(aes(x=reorder(sampleID, Shannon), y = Shannon)) +
geom_bar(stat="identity",aes(fill=diversity_group_abv), width = 0.9) +
theme_bw() +
theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
axis.ticks.x = eb(),
strip.text = eb(),
strip.background = er(fill = "white"),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 0,
r = 5,
b = 0,
l = 5),
panel.grid = eb()) +
scale_fill_manual(values = diversity_group_colors) +
facet_grid(. ~diversity_group, scales = "free", space = "free")
gg_metaphlan_shannon <-
plot_grid(
gg_metaphlan_grob,
gg_shannon,
axis = "lb",
align = "hv",
nrow = 2,
rel_heights = c(1, 0.15)
)
pdf(file = "./Results/Figure_1A.pdf", width = 12.25, height = 8)
gg_metaphlan_shannon
invisible(dev.off())
#### Combine all into a single figure 1 Start ####
alpha_org_plot <- plot_grid(
gg_alpha_invsim + theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_alpha_shannon + theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_alpha_richness+ theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_lach_rel_abd + theme(axis.text.x = eb(),
axis.title.x = eb(),
plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_ecoc_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.15, 0, 0, 0), "cm")),
gg_ebac_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.15, 0, 0, 0), "cm")),
gg_bact_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.15, 0, 0, 0), "cm")),
gg_oscl_rel_abd + theme(axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.15, 0, 0, 0), "cm")),
nrow = 2,
axis = "lb",
align = "v",
rel_widths = c(1, 1, 1, 1.3),
rel_heights = c(1,1.2)
)
# This is useful to figure out the general layout of the plots
gs <- lapply(1:4, function(ii)
grobTree(rectGrob(gp=gpar(fill=ii, alpha=0.5)), textGrob(ii)))
grid.arrange(grobs=gs, ncol=4,
top="top label", bottom="bottom\nlabel",
left="left label", right="right label")lay <- rbind(c(1,1,1,1,1,3,3,2),
c(1,1,1,1,1,3,3,2),
c(4,4,4,4,4,4,NA,NA),
c(4,4,4,4,4,4,4,4),
c(4,4,4,4,4,4,4,4))
grid.arrange(grobs = gs, layout_matrix = lay)pdf(file = "./Results/Figure_1.pdf", height = 16, width = 20)
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 3
tax_umap_mat_plot +
theme(
legend.text = et(size = 10),
legend.title = et(size = 12),
plot.margin = margin(
t = 5, # Top margin
r = 0, # Right margin
b = 75, # Bottom margin
l = 5 # Left margin
)
), # 2
alpha_org_plot, # 4
layout_matrix = lay
)
invisible(dev.off())
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 2
tax_umap_mat_plot +
theme(
legend.text = et(size = 10),
legend.title = et(size = 12),
plot.margin = margin(
t = 5, # Top margin
r = 0, # Right margin
b = 75, # Bottom margin
l = 5 # Left margin
)
), # 3
alpha_org_plot, # 4
layout_matrix = lay
)#### Combine all into a single figure 1 End ####heatmap_data <-
t_metaphlan %>%
drop_na(taxid) %>%
select(sampleID) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
left_join(metab_qual_anon) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
group_by(compound) %>%
mutate(median_val = ifelse(median(mvalue, na.rm = TRUE) == 0, min(mvalue[mvalue > 0], na.rm = TRUE)/10, median(mvalue, na.rm = TRUE)),
heatmap_val = ifelse(log(mvalue/ median_val, base = 2) == -Inf, 0, log(mvalue/ median_val, base = 2))) %>%
ungroup() %>%
select(-c(mvalue, median_val)) %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
ungroup() %>%
select(-db) %>%
left_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv), by = "sampleID") %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup()
# Stats
heatmap_pvals <-
heatmap_data %>%
filter(diversity_group_abv != "Healthy Donor") %>%
drop_na(compound) %>%
group_by(compound) %>%
rstatix::kruskal_test(heatmap_val~diversity_group_abv) %>%
rstatix::adjust_pvalue(method = "BH") %>%
select(compound, statistic, p, p.adj)
heatmap_labels <-
heatmap_data %>%
mutate(compound = str_to_title(compound)) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
arrange(class, subclass, p) %>%
arrange(class, subclass) %>%
pivot_wider(c(diversity_group_abv, sampleID, db, Shannon), names_from = "compound", values_from = "heatmap_val", values_fn = mean) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup() %>%
distinct(sampleID, .keep_all = TRUE) %>%
select(diversity_group_abv) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
)
# Build heatmap compound order (row order) and compound class (row slice)
heatmap_order <-
heatmap_data %>%
ungroup() %>%
filter(compound %in% heatmap_cmpds$compound) %>%
distinct(compound) %>%
drop_na() %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
"Dicarboxylic Acid", # 2
"Amino Acid", # 3
"Bile Acid", # 4
"Indole", # 5
"Phenolic Aromatic", # 6
"Kynurine Pathway", # 7
"Vitamin" # 8
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Branched-Chain Fatty Acid", # 2
"Aminated Fatty Acid", # 3
"Long-Chain Fatty Acid", # 4
"Dicarboxylic Acid", # 5
"Amino Acid", # 6
"Primary Bile Acid", # 7
"Secondary Bile Acid", # 8
"Conjugated Bile Acid", # 9
"Indole", # 10
"Phenolic Aromatic", # 11
"Kynurine Pathway", # 12
"Vitamin" # 13
)
)) %>%
arrange(class, subclass, p, compound) %>%
select(class, subclass, p, compound)
# Build heatmap patient order following same order as gg_metaphlan plot
heatmap_column_order <- heatmap_data %>%
group_by(patientID) %>%
dplyr::slice(1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
select(patientID) %>%
distinct(patientID) %>%
pull(patientID)
# P-value legend color
pvalue_col_fun = colorRamp2(c(0, 0.045), c("#75C236", "#E3E4E6"))
# Create heatmap for adjusted p-values
pvalue_adj <-
heatmap_pvals %>%
group_by(compound, p.adj, p) %>%
dplyr::slice(1) %>%
ungroup() %>%
left_join(heatmap_lookup) %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
"Dicarboxylic Acid", # 2
"Amino Acid", # 3
"Bile Acid", # 4
"Indole", # 5
"Phenolic Aromatic", # 6
"Kynurine Pathway", # 7
"Vitamin" # 8
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Branched-Chain Fatty Acid", # 2
"Aminated Fatty Acid", # 3
"Long-Chain Fatty Acid", # 4
"Dicarboxylic Acid", # 5
"Amino Acid", # 6
"Primary Bile Acid", # 7
"Secondary Bile Acid", # 8
"Conjugated Bile Acid", # 9
"Indole", # 10
"Phenolic Aromatic", # 11
"Kynurine Pathway", # 12
"Vitamin" # 13
)
)) %>%
arrange(class, subclass, p.adj, compound) %>%
select(class, subclass, p.adj, compound) %>%
column_to_rownames(var = "compound") %>%
select(`Adjusted p-value` = p.adj) %>%
as.matrix() %>%
Heatmap(name = "Significance (adjusted p-value)",
cluster_rows = FALSE,
cluster_columns = FALSE,
col = pvalue_col_fun,
column_title = "KW Test",
column_title_side = "bottom",
column_title_rot = 90,
show_column_names = FALSE,
column_names_side = "top",
row_names_gp = gpar(fontsize = 8),
rect_gp = gpar(col = "black", lwd = 0.2),
row_gap = unit(3.5, "mm"),
row_names_side = c("right"),
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
show_row_names = FALSE,
row_title_rot = 0,
heatmap_legend_param = list(at = seq(0.05, 0, -0.01)),
width = unit(0.1, "in")
)
# Heatmap legend color
col_fun <- colorRamp2(breaks = c(-5, 0, 5), colors = c("#00aaad", "white", "#ad003a"))
# Global parameter for annotation
# ht_opt$COLUMN_ANNO_PADDING <- unit(2.5, "mm")
gg_metab_heatmap <-
heatmap_data %>%
mutate(compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1) %>% select(compound, p.adj, p), by = "compound") %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
"Dicarboxylic Acid", # 2
"Amino Acid", # 3
"Bile Acid", # 4
"Indole", # 5
"Phenolic Aromatic", # 6
"Kynurine Pathway", # 7
"Vitamin" # 8
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Branched-Chain Fatty Acid", # 2
"Aminated Fatty Acid", # 3
"Long-Chain Fatty Acid", # 4
"Dicarboxylic Acid", # 5
"Amino Acid", # 6
"Primary Bile Acid", # 7
"Secondary Bile Acid", # 8
"Conjugated Bile Acid", # 9
"Indole", # 10
"Phenolic Aromatic", # 11
"Kynurine Pathway", # 12
"Vitamin" # 13
)
)) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
) %>%
arrange(class, subclass, p.adj, compound) %>%
pivot_wider(c(diversity_group_abv, db, patientID, Shannon), names_from = compound, values_from = heatmap_val) %>%
group_by(patientID) %>%
arrange(patientID) %>%
ungroup() %>%
select(-`NA`) %>%
distinct(patientID, .keep_all = TRUE) %>%
group_by(diversity_group_abv) %>%
arrange(db, Shannon) %>%
ungroup() %>%
select(-Shannon, -db, -diversity_group_abv) %>%
column_to_rownames("patientID") %>%
as.matrix() %>%
t() %>%
Heatmap(
name = "Fold Change (log2)",
col = col_fun,
na_col = "grey83",
rect_gp = gpar(col = "grey40", lwd = 1.5),
column_names_gp = grid::gpar(fontsize = 8),
column_gap = unit(2.5, "mm"),
column_split = heatmap_labels,
column_order = heatmap_column_order,
column_title_gp = gpar(fontsize = 14),
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = TRUE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 8),
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
heatmap_height = unit(13, "in"),
heatmap_width = unit(19, "in")
)
gg_metab_heatmap_tot <- gg_metab_heatmap + pvalue_adj
gg_metab_heatmap_totpdf(file = "./Results/Figure_2.pdf", height = 15, width = 26, onefile = F)
gg_metab_heatmap_tot
invisible(dev.off())# Conversions of compounds
metab_conversions <- data.frame(
compound = c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid",
"Kynurenic Acid",
"Kynurenine",
"Anthranilic Acid",
"Desaminotyrosine",
"Niacin",
"Tyrosine",
"Tryptamine",
"Tryptophan",
"Phenylalanine",
"Acetate",
"Butyrate",
"Propionate",
"Succinate"
),
molar_mass__gmol = c(
515.7,
465.6,
408.6,
374.6,
376.6,
392.6,
392.6,
376.6,
189.17,
208.21,
137.14,
166.17,
123.11,
181.19,
160.22,
204.22,
165.19,
59.04,
87.1,
73.07,
116.07
)
)
metab_quant_converted <- metab_quant_anon %>%
mutate(
compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)
) %>%
mutate(units = case_when(compound %in% c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid"
) ~ "ugmL",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "mM",
TRUE ~ "uM")) %>%
right_join(metab_conversions, by = "compound") %>% # right_join to only keep compounds we're interested in
mutate(mvalue__mM = case_when(units == "ugmL" ~ (mvalue* #ugmL
((1000/ #1000 ml per L
1000000)/ #1000000 ug per gram
molar_mass__gmol)* #gram per mol (molar mass)
1000 #1000 mM per 1M
),
units == "uM" ~ mvalue/ #uM
1000, #1000uM per 1M
TRUE ~ mvalue #units are already in mM
)
) %>%
select(sampleID, compound, mvalue__mM)
metab_boxplot <-
metaphlan_df2 %>%
left_join(metaphlan_df_sumry) %>%
drop_na(taxid) %>%
select(sampleID, diversity_group_abv, db) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
left_join(metab_quant_converted) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(class = case_when(compound %in% c("Taurocholic Acid", "Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid", "Deoxycholic Acid", "Isodeoxycholic Acid",
"Lithocholic Acid") ~ "Secondary Bile Acid",
compound %in% c("Threonine", "Glycine", "Tyrosine", "Tyramine", "Serine", "Leucine", "Isoleucine",
"Valine", "Phenylalanine", "Alanine", "Proline", "Aspartate",
"Methionine", "Glutamate", "Lysine", "Cysteine", "Tryptophan") ~ "Amino Acid",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid", "Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin",
TRUE ~ "Indole"),
compound = case_when(class == "Conjugated Primary Bile Acid" ~ paste(compound, "(1˚Conj. BA)"),
class == "Primary Bile Acid" ~ paste(compound, "(1˚ BA)"),
class == "Secondary Bile Acid" ~ paste(compound, "(2˚ BA)"),
class == "Short-Chain Fatty Acid" ~ paste(compound, "(SCFA)"),
class == "Amino Acid" ~ paste(compound, "(AA)"),
class == "Phenolic Aromatic" ~ paste(compound, "(Phen. Arom.)"),
class == "Indole" ~ paste0(compound, "(Indole)"),
class == "Kynurenine Metabolite" ~ paste(compound, "(Kyn. Metab.)"),
class == "B-Vitamin" ~ paste(compound, "(B-Vitamin)")
)) %>%
drop_na() %>%
group_by(compound) %>%
mutate(count = length(unique(diversity_group_abv))) %>%
filter(count == 4) %>%
select(-count) %>%
mutate(compound = factor(
compound,
levels = c(
"Acetate (SCFA)", # 1
"Propionate (SCFA)", # 2
"Butyrate (SCFA)", # 3
"Succinate (SCFA)", # 4
"Tyrosine (AA)", # 5
"Tryptophan (AA)", # 6
"Phenylalanine (AA)", # 7
"Cholic Acid (1˚ BA)", # 8
"Glycocholic Acid (1˚Conj. BA)", # 9
"Taurocholic Acid (1˚Conj. BA)", # 10
"Deoxycholic Acid (2˚ BA)", # 11
"Lithocholic Acid (2˚ BA)", # 12
"Isodeoxycholic Acid (2˚ BA)", # 13
"3-Oxolithocholic Acid (2˚ BA)", # 14
"Alloisolithocholic Acid (2˚ BA)", # 15
"Desaminotyrosine (Phen. Arom.)", # 16
"Kynurenine (Kyn. Metab.)", # 17
"Anthranilic Acid (Kyn. Metab.)", # 18
"Tryptamine (Kyn. Metab.)", # 19
"Niacin (B-Vitamin)" # 20
)
),
class = factor(
class,
levels = c(
"Short-Chain Fatty Acid", # 1
"Amino Acid", # 2
"Primary Bile Acid", # 3
"Conjugated Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Indole", # 6
"Phenolic Aromatic", # 7
"Kynurenine Metabolite", # 8
"B-Vitamin" # 9
)
)) %>%
filter(compound != "Kynurenic Acid") %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor")),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
filter(compound %in% c(
"Acetate (SCFA)",
"Butyrate (SCFA)",
"Propionate (SCFA)",
"Glycocholic Acid (1˚Conj. BA)",
"Taurocholic Acid (1˚Conj. BA)",
"Cholic Acid (1˚ BA)",
"Deoxycholic Acid (2˚ BA)",
"Lithocholic Acid (2˚ BA)",
"Alloisolithocholic Acid (2˚ BA)"
))
metab_boxplot_stats <-
metab_boxplot %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "none",
alternative= "two.sided") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))) %>%
mutate(y.position = case_when(group1 == "High" & group2 == "Healthy Donor" ~ 2.30,
group1 == "Medium" & group2 == "High" ~ 2.7,
group1 == "Low" & group2 == "High" ~ 3.1,
group1 == "Low" & group2 == "Medium" ~ 3.5)) # Set stats brackets to fixed points since the y-scale will be log10 transformed
set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
ggboxplot(metab_boxplot,
x = "diversity_group_abv",
y = "mvalue__mM",
fill = "db",
color = "diversity_group_abv",
alpha = 0.65,
outlier.shape = NA,
facet.by = c("class", "compound")) +
theme(legend.text = et(size = 12, color = "black"),
legend.title = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.title.y = et(size = 12, color = "black"),
panel.border = eb(),
strip.background = er(colour="white", fill="white"),
) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = 0.35, y = 0, xend = 0.35, yend = Inf)) +
facet_wrap(~compound, scales = "fixed") +
stat_pvalue_manual(metab_boxplot_stats,
tip.length = 0.015) +
geom_point(
data = metab_boxplot,
aes(x = diversity_group_abv, y = mvalue__mM, color = diversity_group_abv),
position = position_jitter(width = 0.2),
size = 2,
alpha = 0.65) +
scale_fill_manual("Cohort", values = rev(pirate_colors)) +
scale_color_manual("Diversity Group", values = diversity_group_colors) +
scale_y_log10(limits = c(0.01, 5000),
labels = c(0.01, 0.1, 1, 10, 100, 1000),
breaks = c(0.01, 0.1, 1, 10, 100, 1000),
expand = expansion(mult = c(0.1, 0.2))) +
ylab("Concentration (mM)\n")
gg_metab_boxplotcairo_pdf(filename = "./Results/Figure_3.pdf", width = 14, height = 10, onefile = FALSE)
gg_metab_boxplot
invisible(dev.off())gg_ecoc_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
"Firmicutes-Lactobacillales-Enterococcaceae-Enterococcus",
after = Inf)) %>%
arrange(-enterococcus_rel_abundance) %>%
group_by(sampleID) %>%
ggplot(aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = et(angle = 0, size = 14), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ecoc_all_patients
# Discrete heatmap of infections
ecoc_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterococcus_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ecoc_infx_orgs_order <- ecoc_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative",
"No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "0"))) %>%
left_join(ecoc_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection")))
gg_ecoc_infx_orgs <- ecoc_infx_orgs_order %>%
droplevels() %>%
ungroup() %>%
ggplot(., aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#129246",
"#0C7A3A", "#08592B", "#FF0000", "#CC0404", "#8A0202", "#5C0202",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ecoc_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ecoc_infx_orgs
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_4A.pdf", height = 8, width = 15,
onefile = F)
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)
invisible(dev.off())gg_ebac_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
c("Proteobacteria-Enterobacterales-Enterobacteriaceae-Citrobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Enterobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Escherichia",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Klebsiella",
"Proteobacteria-Enterobacterales-Morganellaceae-Proteus"),
after = Inf)) %>%
arrange(-enterobacterales_rel_abundance) %>%
ggplot(aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = et(angle = 0, size = 14), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ebac_all_patients
# Discrete heatmap of infections
ebac_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterobacterales_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ebac_infx_orgs_order <- ebac_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis",
"Enterococcus avium", "Other Bacterial Infection",
"Culture Negative", "No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("4", "5", "6", "7", "1", "2", "3", "8",
"9", "0"))) %>%
left_join(ebac_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("4", "5",
"6", "7", "1", "2", "3", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection")))
gg_ebac_infx_orgs <- ebac_infx_orgs_order %>%
ggplot(., aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#FF0000",
"#CC0404", "#8A0202", "#5C0202", "#129246", "#0C7A3A", "#08592B",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ebac_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ebac_infx_orgs
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_4B.pdf", height = 8, width = 15,
onefile = F)
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)
invisible(dev.off())# Qual compounds that we can quantify and that are also
# significant in the volcano analysis for both
signif_compounds <- qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
filter(p.adj <= 0.05 & abs(log2fc_val) > 0.75) %>%
distinct(compound)
metab_boxplot_ecoc_expan <- peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance) %>%
left_join(sample_lookup) %>%
group_by(sampleID) %>%
slice(1) %>%
left_join(metab_quant_converted %>%
filter(compound %in% signif_compounds$compound)) %>%
filter(compound %!in% c("Kynurenine", "Anthranilic Acid")) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(db = ifelse(db == "HealthyDonor", "Healthy Donor",
"Liver Transplant") %>%
factor(., levels = c("Healthy Donor", "Liver Transplant")),
class = case_when(compound %in% c("Taurocholic Acid",
"Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid",
"Deoxycholic Acid", "Isodeoxycholic Acid", "Lithocholic Acid") ~
"Secondary Bile Acid", compound %in% c("Threonine",
"Glycine", "Tyrosine", "Tyramine", "Serine",
"Leucine", "Isoleucine", "Valine", "Phenylalanine",
"Alanine", "Proline", "Aspartate", "Methionine",
"Glutamate", "Lysine", "Cysteine", "Tryptophan") ~
"Amino Acid", compound %in% c("Acetate", "Butyrate",
"Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid",
"Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin", TRUE ~ "Indole")) %>%
drop_na() %>%
group_by(compound) %>%
mutate(class = factor(class, levels = c("Conjugated Primary Bile Acid",
"Primary Bile Acid", "Secondary Bile Acid", "Short-Chain Fatty Acid",
"Amino Acid", "Phenolic Aromatic", "Indole", "Kynurenine Metabolite",
"B-Vitamin")), compound = case_when(class == "Conjugated Primary Bile Acid" ~
paste(compound, "(1˚Conj. BA)"), class == "Primary Bile Acid" ~
paste(compound, "(1˚ BA)"), class == "Secondary Bile Acid" ~
paste(compound, "(2˚ BA)"), class == "Short-Chain Fatty Acid" ~
paste(compound, "(SCFA)"), class == "Amino Acid" ~ paste(compound,
"(AA)"), class == "Phenolic Aromatic" ~ paste(compound,
"(Phen. Arom.)"), class == "Indole" ~ paste0(compound,
"(Indole)"), class == "Kynurenine Metabolite" ~ paste(compound,
"(Kyn. Metab.)"), class == "B-Vitamin" ~ paste(compound,
"(B-Vitamin)")), compound = factor(compound, levels = c("Acetate (SCFA)",
"Butyrate (SCFA)", "Propionate (SCFA)", "Succinate (SCFA)",
"Taurocholic Acid (1˚Conj. BA)", "Glycocholic Acid (1˚Conj. BA)",
"Cholic Acid (1˚ BA)", "3-Oxolithocholic Acid (2˚ BA)",
"Alloisolithocholic Acid (2˚ BA)", "Deoxycholic Acid (2˚ BA)",
"Isodeoxycholic Acid (2˚ BA)", "Lithocholic Acid (2˚ BA)",
"Kynurenic Acid (Kyn. Metab.)", "Kynurenine (Kyn. Metab.)",
"Anthranilic Acid (Kyn. Metab.)", "Desaminotyrosine",
"Niacin", "Tyrosine", "Tryptamine", "Tryptophan", "Phenylalanine"))) %>%
ungroup() %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue__mM[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue__mM[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue__mM != 0)) %>%
mutate(enterococcus_expansion = ifelse(enterococcus_expansion ==
"1", "Expansion", "No Expansion"))
metab_boxplot_summary_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(compound) %>%
summarise(y.position = max(mvalue__mM) * 1.1)
metab_boxplot_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ",
round(p.adj, 3)))) %>%
add_xy_position(x = "enterococcus_expansion") %>%
select(-y.position) %>%
left_join(metab_boxplot_summary_stats_ecoc_expan)
gg_metab_boxplot_ecoc_expan <- ggboxplot(metab_boxplot_ecoc_expan,
x = "enterococcus_expansion", y = "mvalue__mM", fill = "enterococcus_expansion",
alpha = 0.65, outlier.shape = NA) + theme(legend.text = et(size = 14,
color = "black"), legend.title = et(size = 14, color = "black"),
axis.title.x = eb(), axis.title.y = et(size = 16, color = "black"),
strip.text = et(size = 14, color = "black"), strip.background = eb(),
panel.border = eb(), axis.line = eb()) + facet_wrap(~compound,
scales = "free_y", nrow = 2) + annotate("segment", x = 0.35,
xend = 0.35, y = 0, yend = Inf, colour = "black", linewidth = 1) +
annotate("segment", x = 0.35, xend = Inf, y = 0, yend = 0,
colour = "black") + stat_pvalue_manual(metab_boxplot_stats_ecoc_expan,
label = "{p.adj}", bracket.size = 0) + geom_point(data = metab_boxplot_ecoc_expan,
aes(x = enterococcus_expansion, y = mvalue__mM, fill = enterococcus_expansion),
position = position_jitter(width = 0.2), size = 2, shape = 21,
alpha = 0.65, color = "black") + scale_fill_manual("Enterococcus",
values = c("gray87", "#389458")) + scale_y_continuous(expand = expansion(mult = c(0.1,
0.2))) + ylab("Concentration (mM)\n")
gg_metab_boxplot_ecoc_expancairo_pdf(filename = "./Results/Figure_5B.pdf", height = 8, width = 16,
onefile = TRUE)
gg_metab_boxplot_ecoc_expan
invisible(dev.off())diversity_metab_mat <-
metab_qual_anon %>% filter(sampleID %in% first_samps$sampleID) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
group_by(sampleID, compound, diversity_group_abv) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, diversity_group_abv) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`, - diversity_group_abv) %>%
filter_all(any_vars(!is.na(.)))
diversity_metab_labs <-
diversity_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
droplevels() %>%
pull(diversity_group_abv)
dim(diversity_metab_mat) #123 93 (means 93 compounds and 102 LT patients/infections)## [1] 102 93
length(diversity_metab_labs) #123 (means 102 LT patients/infections)## [1] 102
# Begin model training
set.seed(1234)
diversity_train <- sample(1:nrow(diversity_metab_mat), as.integer(0.7*nrow(diversity_metab_mat))) # randomly select 70% samples in training
diversity_test <- setdiff(1:nrow(diversity_metab_mat), diversity_train) # rest is part of the test set
# store matrices into training and test set:
diversity_metab_mat.train <- diversity_metab_mat[diversity_train, ]
diversity_metab_mat.test <- diversity_metab_mat[diversity_test,]
diversity_metab_labs.train <- diversity_metab_labs[diversity_train]
diversity_metab_labs.test <- diversity_metab_labs[diversity_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
diversity_train_splsda <- mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
diversity_train_plsda_perf <-
perf(
diversity_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
diversity_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist# Number of optimal variables to select for each component
diversity_train_keepX <- c(1:10, seq(20, 130, 10))
set.seed(123)
diversity_train_tune_splsda <-
mixOmics::tune.splsda(
diversity_metab_mat.train,
diversity_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = diversity_train_keepX,
nrepeat = 50
)
plot(diversity_train_tune_splsda, col = color.jet(4))diversity_train_error <- diversity_train_tune_splsda$error.rate
diversity_train_ncomp <- diversity_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
diversity_train_ncomp #2 components are optimal## [1] 2
diversity_train_select_keepX <- diversity_train_tune_splsda$choice.keepX[1:diversity_train_ncomp] # optimal number of variables to select per component
diversity_train_select_keepX## comp1 comp2
## 60 10
# Final Model
diversity_train_splsda_final <-
mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = diversity_train_ncomp, keepX = diversity_train_select_keepX)
# Test the model
diversity_predict_train_splsda_final <- predict(diversity_train_splsda_final, diversity_metab_mat.test,
dist = "max.dist")
diversity_predict_train_comp2 <- diversity_predict_train_splsda_final$class$max.dist[,diversity_train_ncomp]
diversity_union <- union(diversity_predict_train_comp2, diversity_metab_labs.test)
confusionMatrix(table(factor(diversity_predict_train_comp2, diversity_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs.test, diversity_union,
levels = c("Low", "Medium", "High"))))## Confusion Matrix and Statistics
##
##
## Low High Medium
## Low 8 7 5
## High 0 3 2
## Medium 0 1 5
##
## Overall Statistics
##
## Accuracy : 0.5161
## 95% CI : (0.3306, 0.6985)
## No Information Rate : 0.3871
## P-Value [Acc > NIR] : 0.099557
##
## Kappa : 0.3101
##
## Mcnemar's Test P-Value : 0.006324
##
## Statistics by Class:
##
## Class: Low Class: High Class: Medium
## Sensitivity 1.0000 0.27273 0.4167
## Specificity 0.4783 0.90000 0.9474
## Pos Pred Value 0.4000 0.60000 0.8333
## Neg Pred Value 1.0000 0.69231 0.7200
## Prevalence 0.2581 0.35484 0.3871
## Detection Rate 0.2581 0.09677 0.1613
## Detection Prevalence 0.6452 0.16129 0.1935
## Balanced Accuracy 0.7391 0.58636 0.6820
diversity_train_background <- background.predict(diversity_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
diversity_tot <- predict(diversity_train_splsda_final,
diversity_metab_mat,
dist = "max.dist")
diversity_tot_predict <- diversity_tot$class$max.dist[,diversity_train_ncomp]
diversity_tot_union <- union(diversity_tot_predict, diversity_metab_labs)
diversity_cm <- confusionMatrix(table(factor(diversity_tot_predict, diversity_tot_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs, diversity_tot_union,
levels = c("Low", "Medium", "High"))),
)
diversity_cm## Confusion Matrix and Statistics
##
##
## Low High Medium
## Low 34 16 7
## High 2 21 3
## Medium 1 2 16
##
## Overall Statistics
##
## Accuracy : 0.6961
## 95% CI : (0.5971, 0.7833)
## No Information Rate : 0.3824
## P-Value [Acc > NIR] : 1.375e-10
##
## Kappa : 0.5341
##
## Mcnemar's Test P-Value : 0.001377
##
## Statistics by Class:
##
## Class: Low Class: High Class: Medium
## Sensitivity 0.9189 0.5385 0.6154
## Specificity 0.6462 0.9206 0.9605
## Pos Pred Value 0.5965 0.8077 0.8421
## Neg Pred Value 0.9333 0.7632 0.8795
## Prevalence 0.3627 0.3824 0.2549
## Detection Rate 0.3333 0.2059 0.1569
## Detection Prevalence 0.5588 0.2549 0.1863
## Balanced Accuracy 0.7825 0.7295 0.7880
# Additional model measures
diversity_epi <- mltest::ml_test(predicted = factor(diversity_tot_predict, levels = c("Low", "Medium", "High")),
true = factor(diversity_metab_labs, levels = c("Low", "Medium", "High")))
diversity_cm_names <- diversity_cm$table
colnames(diversity_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh")
rownames(diversity_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh")
diversity_confusion_df <- diversity_cm_names %>%
t()
{
pdf(file = "./Results/Figure_6A.pdf", height = 10, width = 10)
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups",
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -6.68,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -4.75,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -5,
x = 1.5,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 1.5,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.6,
x = 1.5,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
# text(
# y = 2.1,
# x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")
invisible(dev.off())
}
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups",
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -6.68,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -4.75,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -5,
x = 1.5,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 1.5,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.6,
x = 1.5,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")# text(
# y = 2.1,
# x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")diversity_healthy_metab_mat <-
metab_qual_anon %>% filter(sampleID %in% first_samps$sampleID|grepl(patientID, pattern = "ht")) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
group_by(sampleID, compound, diversity_group_abv) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, diversity_group_abv) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`, - diversity_group_abv) %>%
filter_all(any_vars(!is.na(.)))
diversity_healthy_metab_labs <-
diversity_healthy_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
droplevels() %>%
pull(diversity_group_abv)
dim(diversity_healthy_metab_mat) #123 93 (means 93 compounds and 102 LT patients/infections)## [1] 123 93
length(diversity_healthy_metab_labs) #123 (means 102 LT patients/infections)## [1] 123
# Begin model training
set.seed(1234)
diversity_healthy_train <- sample(1:nrow(diversity_healthy_metab_mat), as.integer(0.7*nrow(diversity_healthy_metab_mat))) # randomly select 70% samples in training
diversity_healthy_test <- setdiff(1:nrow(diversity_healthy_metab_mat), diversity_healthy_train) # rest is part of the test set
# store matrices into training and test set:
diversity_healthy_metab_mat.train <- diversity_healthy_metab_mat[diversity_healthy_train, ]
diversity_healthy_metab_mat.test <- diversity_healthy_metab_mat[diversity_healthy_test,]
diversity_healthy_metab_labs.train <- diversity_healthy_metab_labs[diversity_healthy_train]
diversity_healthy_metab_labs.test <- diversity_healthy_metab_labs[diversity_healthy_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
diversity_healthy_train_splsda <- mixOmics::splsda(diversity_healthy_metab_mat.train, diversity_healthy_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
diversity_healthy_train_plsda_perf <-
perf(
diversity_healthy_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
diversity_healthy_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist# Number of optimal variables to select for each component
diversity_healthy_train_keepX <- c(1:10, seq(20, 130, 10))
set.seed(123)
diversity_healthy_train_tune_splsda <-
mixOmics::tune.splsda(
diversity_healthy_metab_mat.train,
diversity_healthy_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = diversity_healthy_train_keepX,
nrepeat = 50
)
plot(diversity_healthy_train_tune_splsda, col = color.jet(4))diversity_healthy_train_error <- diversity_healthy_train_tune_splsda$error.rate
diversity_healthy_train_ncomp <- diversity_healthy_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
diversity_healthy_train_ncomp #3 components are optimal## [1] 3
diversity_healthy_train_select_keepX <- diversity_healthy_train_tune_splsda$choice.keepX[1:diversity_healthy_train_ncomp] # optimal number of variables to select per component
diversity_healthy_train_select_keepX## comp1 comp2 comp3
## 70 90 90
# Final Model
diversity_healthy_train_splsda_final <-
mixOmics::splsda(diversity_healthy_metab_mat.train, diversity_healthy_metab_labs.train, ncomp = diversity_healthy_train_ncomp, keepX = diversity_healthy_train_select_keepX)
# Test the model
diversity_healthy_predict_train_splsda_final <- predict(diversity_healthy_train_splsda_final, diversity_healthy_metab_mat.test,
dist = "max.dist")
diversity_healthy_predict_train_comp2 <- diversity_healthy_predict_train_splsda_final$class$max.dist[,diversity_healthy_train_ncomp]
diversity_healthy_union <- union(diversity_healthy_predict_train_comp2, diversity_healthy_metab_labs.test)
confusionMatrix(table(factor(diversity_healthy_predict_train_comp2, diversity_healthy_union,
levels = c("Low", "Medium", "High", "Healthy Donor")),
factor(diversity_healthy_metab_labs.test, diversity_healthy_union,
levels = c("Low", "Medium", "High", "Healthy Donor"))))## Confusion Matrix and Statistics
##
##
## Healthy Donor Low High Medium
## Healthy Donor 10 6 1 0
## Low 1 6 0 0
## High 0 1 4 0
## Medium 0 1 3 4
##
## Overall Statistics
##
## Accuracy : 0.6486
## 95% CI : (0.4746, 0.7979)
## No Information Rate : 0.3784
## P-Value [Acc > NIR] : 0.0007838
##
## Kappa : 0.5247
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Healthy Donor Class: Low Class: High Class: Medium
## Sensitivity 0.9091 0.4286 0.5000 1.0000
## Specificity 0.7308 0.9565 0.9655 0.8788
## Pos Pred Value 0.5882 0.8571 0.8000 0.5000
## Neg Pred Value 0.9500 0.7333 0.8750 1.0000
## Prevalence 0.2973 0.3784 0.2162 0.1081
## Detection Rate 0.2703 0.1622 0.1081 0.1081
## Detection Prevalence 0.4595 0.1892 0.1351 0.2162
## Balanced Accuracy 0.8199 0.6925 0.7328 0.9394
diversity_healthy_train_background <- background.predict(diversity_healthy_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
diversity_healthy_tot <- predict(diversity_healthy_train_splsda_final,
diversity_healthy_metab_mat,
dist = "max.dist")
diversity_healthy_tot_predict <- diversity_healthy_tot$class$max.dist[,diversity_healthy_train_ncomp]
diversity_healthy_tot_union <- union(diversity_healthy_tot_predict, diversity_healthy_metab_labs)
diversity_healthy_cm <- confusionMatrix(table(factor(diversity_healthy_tot_predict, diversity_healthy_tot_union,
levels = c("Low", "Medium", "High", "Healthy Donor")),
factor(diversity_healthy_metab_labs, diversity_healthy_tot_union,
levels = c("Low", "Medium", "High", "Healthy Donor"))),
)
diversity_healthy_cm## Confusion Matrix and Statistics
##
##
## Healthy Donor Medium Low High
## Healthy Donor 34 14 5 0
## Medium 2 23 2 0
## Low 1 1 14 0
## High 0 1 5 21
##
## Overall Statistics
##
## Accuracy : 0.748
## 95% CI : (0.6617, 0.8219)
## No Information Rate : 0.3171
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6575
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Healthy Donor Class: Medium Class: Low Class: High
## Sensitivity 0.9189 0.5897 0.5385 1.0000
## Specificity 0.7791 0.9524 0.9794 0.9412
## Pos Pred Value 0.6415 0.8519 0.8750 0.7778
## Neg Pred Value 0.9571 0.8333 0.8879 1.0000
## Prevalence 0.3008 0.3171 0.2114 0.1707
## Detection Rate 0.2764 0.1870 0.1138 0.1707
## Detection Prevalence 0.4309 0.2195 0.1301 0.2195
## Balanced Accuracy 0.8490 0.7711 0.7589 0.9706
# Additional model measures
diversity_healthy_epi <- mltest::ml_test(predicted = factor(diversity_healthy_tot_predict, levels = c("Low", "Medium", "High", "Healthy Donor")),
true = factor(diversity_healthy_metab_labs, levels = c("Low", "Medium", "High", "Healthy Donor")))
diversity_healthy_cm_names <- diversity_healthy_cm$table
colnames(diversity_healthy_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh", "Actual\nHealthy Donor")
rownames(diversity_healthy_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh", "Predicted\nHealthy Donor")
diversity_healthy_confusion_df <- diversity_healthy_cm_names %>%
t()
{
pdf(file = "./Results/Potential_Figure_6A.pdf", height = 10, width = 10)
plotIndiv(
diversity_healthy_train_splsda_final,
xlim = c(min(diversity_healthy_train_splsda_final$variates$X[,1])*1.75, max(diversity_healthy_train_splsda_final$variates$X[,1])*1.15),
ylim = c(min(diversity_healthy_train_splsda_final$variates$X[,2])*1.25, max(diversity_healthy_train_splsda_final$variates$X[,2])*1.25),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_healthy_train_background,
col = c("#3A001E", "#8A0246", "#C20463", "#1A49BE"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups",
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_healthy_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_healthy_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = 7.35,
x = 6.30,
diversity_healthy_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.5,
bg = "white"
)
text(
y = 7,
x = 7.75,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_healthy_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_healthy_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_healthy_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.5, adj = 0)
text(
y = -3.5,
x = -6,
paste0("Low Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -3.8,
x = -6,
paste0("Low Diversity Sens. = ", round(diversity_healthy_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.1,
x = -6,
paste0("Low Diversity Spec. = ", round(diversity_healthy_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = -4.4,
# x = -6,
# paste0("Low Diversity OR = ", round(diversity_healthy_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 3.8,
x = -7,
paste0("Medium Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3.5,
x = -7,
paste0("Medium Diversity Sens. = ", round(diversity_healthy_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3.2,
x = -7,
paste0("Medium Diversity Spec. = ", round(diversity_healthy_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = 2.9,
# x = -7,
# paste0("Medium Diversity OR = ", round(diversity_healthy_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 6.5,
x = 2,
paste0("High Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 6.2,
x = 2,
paste0("High Diversity Sens. = ", round(diversity_healthy_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 5.9,
x = 2,
paste0("High Diversity Spec. = ", round(diversity_healthy_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
# text(
# y = 5.6,
# x = 2,
# paste0("High Diversity OR = ", round(diversity_healthy_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")
text(
y = -3.5,
x = 3,
paste0("Healthy Donor ACC = ", round(diversity_healthy_epi$balanced.accuracy[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")
text(
y = -3.8,
x = 3,
paste0("Healthy Donor Sens. = ", round(diversity_healthy_epi$precision[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")
text(
y = -4.1,
x = 3,
paste0("Healthy Donor Spec. = ", round(diversity_healthy_epi$specificity[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")
# text(
# y = -4.4,
# x = 3,
# paste0("Healthy Donor OR = ", round(diversity_healthy_epi$DOR[4], 1)),
# cex = 0.75, adj = 0, col = "#1A49BE")
invisible(dev.off())
}
plotIndiv(
diversity_healthy_train_splsda_final,
xlim = c(min(diversity_healthy_train_splsda_final$variates$X[,1])*1.75, max(diversity_healthy_train_splsda_final$variates$X[,1])*1.15),
ylim = c(min(diversity_healthy_train_splsda_final$variates$X[,2])*1.25, max(diversity_healthy_train_splsda_final$variates$X[,2])*1.25),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_healthy_train_background,
col = c("#3A001E", "#8A0246", "#C20463", "#1A49BE"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups",
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_healthy_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_healthy_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = 7.35,
x = 6.30,
diversity_healthy_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.5,
bg = "white"
)
text(
y = 7,
x = 7.75,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_healthy_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_healthy_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_healthy_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.5, adj = 0)
text(
y = -3.5,
x = -6,
paste0("Low Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -3.8,
x = -6,
paste0("Low Diversity Sens. = ", round(diversity_healthy_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.1,
x = -6,
paste0("Low Diversity Spec. = ", round(diversity_healthy_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = -4.4,
# x = -6,
# paste0("Low Diversity OR = ", round(diversity_healthy_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 3.8,
x = -7,
paste0("Medium Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3.5,
x = -7,
paste0("Medium Diversity Sens. = ", round(diversity_healthy_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3.2,
x = -7,
paste0("Medium Diversity Spec. = ", round(diversity_healthy_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = 2.9,
# x = -7,
# paste0("Medium Diversity OR = ", round(diversity_healthy_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 6.5,
x = 2,
paste0("High Diversity ACC = ", round(diversity_healthy_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 6.2,
x = 2,
paste0("High Diversity Sens. = ", round(diversity_healthy_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 5.9,
x = 2,
paste0("High Diversity Spec. = ", round(diversity_healthy_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
# text(
# y = 5.6,
# x = 2,
# paste0("High Diversity OR = ", round(diversity_healthy_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")
text(
y = -3.5,
x = 3,
paste0("Healthy Donor ACC = ", round(diversity_healthy_epi$balanced.accuracy[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")
text(
y = -3.8,
x = 3,
paste0("Healthy Donor Sens. = ", round(diversity_healthy_epi$precision[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")
text(
y = -4.1,
x = 3,
paste0("Healthy Donor Spec. = ", round(diversity_healthy_epi$specificity[4]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#1A49BE")# text(
# y = -4.4,
# x = 3,
# paste0("Healthy Donor OR = ", round(diversity_healthy_epi$DOR[4], 1)),
# cex = 0.75, adj = 0, col = "#1A49BE")infx_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
right_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
group_by(sampleID, compound, bact_infection_present) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, bact_infection_present) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
select(-bact_infection_present) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
infx_metab_labs <-
infx_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
pull(bact_infection_present)
dim(infx_metab_mat) #108 93 (means 93 compounds and 108 LT patients/infections)## [1] 108 93
length(infx_metab_labs) #108 (means 108 LT patients/infections)## [1] 108
# Begin model training
set.seed(1234)
infx_train <- sample(1:nrow(infx_metab_mat), as.integer(0.7*nrow(infx_metab_mat))) # randomly select 70% samples in training
infx_test <- setdiff(1:nrow(infx_metab_mat), infx_train) # rest is part of the test set
# store matrices into training and test set:
infx_metab_mat.train <- infx_metab_mat[infx_train, ]
infx_metab_mat.test <- infx_metab_mat[infx_test,]
infx_metab_labs.train <- infx_metab_labs[infx_train]
infx_metab_labs.test <- infx_metab_labs[infx_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
infx_train_splsda <- mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
infx_train_plsda_perf <-
perf(
infx_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
infx_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 is best for classification error rate and max.dist# Number of optimal variables to select for each component
infx_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
infx_train_tune_splsda <-
mixOmics::tune.splsda(
infx_metab_mat.train,
infx_metab_labs.train,
ncomp = 3, # Choose 3 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = infx_train_keepX,
nrepeat = 50
)
plot(infx_train_tune_splsda, col = color.jet(3))infx_train_error <- infx_train_tune_splsda$error.rate
infx_train_ncomp <- infx_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
infx_train_ncomp #1 component is optimal## [1] 1
infx_train_select_keepX <- infx_train_tune_splsda$choice.keepX[1:(infx_train_ncomp + 1)] # optimal number of variables to select per component
infx_train_select_keepX## comp1 comp2
## 90 80
# Final Model
infx_train_splsda_final <-
mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = (infx_train_ncomp + 1), keepX = infx_train_select_keepX)
# Test the model
infx_predict_train_splsda_final <- predict(infx_train_splsda_final, infx_metab_mat.test,
dist = "max.dist")
infx_predict_train_comp2 <- infx_predict_train_splsda_final$class$max.dist[,(infx_train_ncomp + 1)]
infx_union <- union(infx_predict_train_comp2, infx_metab_labs.test)
confusionMatrix(table(factor(infx_predict_train_comp2, infx_union),
factor(infx_metab_labs.test, infx_union)),
positive = "Infection")## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 20 5
## Infection 7 1
##
## Accuracy : 0.6364
## 95% CI : (0.4512, 0.796)
## No Information Rate : 0.8182
## P-Value [Acc > NIR] : 0.9965
##
## Kappa : -0.082
##
## Mcnemar's Test P-Value : 0.7728
##
## Sensitivity : 0.1667
## Specificity : 0.7407
## Pos Pred Value : 0.1250
## Neg Pred Value : 0.8000
## Prevalence : 0.1818
## Detection Rate : 0.0303
## Detection Prevalence : 0.2424
## Balanced Accuracy : 0.4537
##
## 'Positive' Class : Infection
##
infx_train_background <- background.predict(infx_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
infx_tot <- predict(infx_train_splsda_final,
infx_metab_mat,
dist = "max.dist")
infx_tot_predict <- infx_tot$class$max.dist[,(infx_train_ncomp + 1)]
infx_tot_union <- union(infx_tot_predict, infx_metab_labs)
infx_cm <- confusionMatrix(table(factor(infx_tot_predict, infx_tot_union,
levels = c("Infection", "No Infection")),
factor(infx_metab_labs, infx_tot_union,
levels = c("Infection", "No Infection"))),
positive = "Infection")
infx_cm## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 17 9
## Infection 10 72
##
## Accuracy : 0.8241
## 95% CI : (0.739, 0.8906)
## No Information Rate : 0.75
## P-Value [Acc > NIR] : 0.04398
##
## Kappa : 0.525
##
## Mcnemar's Test P-Value : 1.00000
##
## Sensitivity : 0.8889
## Specificity : 0.6296
## Pos Pred Value : 0.8780
## Neg Pred Value : 0.6538
## Prevalence : 0.7500
## Detection Rate : 0.6667
## Detection Prevalence : 0.7593
## Balanced Accuracy : 0.7593
##
## 'Positive' Class : Infection
##
# Additional model measures
infx_epi <- epiR::epi.tests(table(infx_tot_predict, infx_metab_labs), conf.level = 0.95)
infx_confusion_df <- infx_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nInfection",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Infection",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nInfection" = "Test +",
"Predicted\nNo Infection" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_6B.pdf", height = 10, width = 10)
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray87"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nAny Infection vs No Infection",
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.1,
x = min(infx_train_splsda_final$variates$X[,1]),
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -4,
x = -2.75,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -1,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*1,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.9,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.8,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.7,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray87"),
star = TRUE,
point.lwd = 0.5,
title = "sPLS-DA:\nAny Infection vs No Infection",
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.1,
x = min(infx_train_splsda_final$variates$X[,1]),
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -4,
x = -2.75,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -1,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*1,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.9,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.8,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.7,
x = min(infx_train_splsda_final$variates$X[,1]),
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0){
pdf(file = "./Results/Figure_6.pdf", height = 8, width = 22)
par(mfrow = c(1, 2))
# Figure 6A
plotIndiv(diversity_train_splsda_final, xlim = c(min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, max(diversity_train_splsda_final$variates$X[,
1])), comp = c(1, 2), pch = 1, ind.names = FALSE, legend = TRUE,
background = diversity_train_background, col = c("#3A001E",
"#8A0246", "#C20463"), star = TRUE, point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups", style = "graphics",
legend.title = "Diversity Group", X.label = paste0("Component 1 (",
round(diversity_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(diversity_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = -2.5, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.2, diversity_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75)
text(y = -3, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Accuracy = ", round(diversity_epi$balanced.accuracy[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Sensitivity = ", round(diversity_epi$precision[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Specificity = ", round(diversity_epi$specificity[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Odds Ratio = ", round(diversity_epi$DOR[1],
1)), cex = 0.75, adj = 0)
text(y = -4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Accuracy = ", round(diversity_epi$balanced.accuracy[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Sensitivity = ", round(diversity_epi$precision[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Specificity = ", round(diversity_epi$specificity[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Odds Ratio = ", round(diversity_epi$DOR[2],
1)), cex = 0.75, adj = 0)
text(y = -5, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Accuracy = ", round(diversity_epi$balanced.accuracy[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Sensitivity = ", round(diversity_epi$precision[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Specificity = ", round(diversity_epi$specificity[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Odds Ratio = ", round(diversity_epi$DOR[3],
1)), cex = 0.75, adj = 0)
# Figure 6B
plotIndiv(infx_train_splsda_final, comp = c(1, 2), pch = 1,
ind.names = FALSE, legend = TRUE, background = infx_train_background,
col = c("goldenrod", "gray87"), star = TRUE, point.lwd = 0.5,
title = "sPLS-DA:\nAny Infection vs No Infection", style = "graphics",
legend.title = "Infection Group", X.label = paste0("Component 1 (",
round(infx_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(infx_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = max(infx_train_splsda_final$variates$X[,
2]) * 0.8, x = min(infx_train_splsda_final$variates$X[,
1]), infx_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.6,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Accuracy = ",
round(infx_cm$overall[1] * 100, 1), "% [", round(infx_cm$overall[3] *
100, 1), "%, ", round(infx_cm$overall[4] * 100,
1), "%]"), cex = 0.75, adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.5,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Sensitivity = ",
paste0(formatC(round(infx_epi$detail[2][3, ], 3) *
100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"),
", ", paste0(formatC(round(infx_epi$detail[4][3,
], 3) * 100, digits = 1, format = "f"), "%"),
"]"), cex = 0.75, adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.4,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Specificity = ",
paste0(formatC(round(infx_epi$detail[2][4, ], 3) *
100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"),
", ", paste0(formatC(round(infx_epi$detail[4][4,
], 3) * 100, digits = 1, format = "f"), "%"),
"]"), cex = 0.75, adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.3,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Odds Ratio = ",
formatC(round(infx_epi$detail[2][6, ], 3), digits = 1,
format = "f"), " [", formatC(round(infx_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(infx_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75,
adj = 0)
invisible(dev.off())
}
par(mfrow = c(1, 2))
# Figure 6A
plotIndiv(diversity_train_splsda_final, xlim = c(min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, max(diversity_train_splsda_final$variates$X[,
1])), comp = c(1, 2), pch = 1, ind.names = FALSE, legend = TRUE,
background = diversity_train_background, col = c("#3A001E",
"#8A0246", "#C20463"), star = TRUE, point.lwd = 0.5,
title = "sPLS-DA:\nDiversity Groups", style = "graphics",
legend.title = "Diversity Group", X.label = paste0("Component 1 (",
round(diversity_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = -2.5, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.2, diversity_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75)
text(y = -3, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Accuracy = ", round(diversity_epi$balanced.accuracy[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Sensitivity = ", round(diversity_epi$precision[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Specificity = ", round(diversity_epi$specificity[1] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -3.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Low Div. Odds Ratio = ", round(diversity_epi$DOR[1],
1)), cex = 0.75, adj = 0)
text(y = -4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Accuracy = ", round(diversity_epi$balanced.accuracy[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Sensitivity = ", round(diversity_epi$precision[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Specificity = ", round(diversity_epi$specificity[2] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -4.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("Medium Div. Odds Ratio = ", round(diversity_epi$DOR[2],
1)), cex = 0.75, adj = 0)
text(y = -5, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Accuracy = ", round(diversity_epi$balanced.accuracy[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.2, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Sensitivity = ", round(diversity_epi$precision[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.4, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Specificity = ", round(diversity_epi$specificity[3] *
100, 1), "%"), cex = 0.75, adj = 0)
text(y = -5.6, x = min(diversity_train_splsda_final$variates$X[,
1]) * 1.15, paste0("High Div. Odds Ratio = ", round(diversity_epi$DOR[3],
1)), cex = 0.75, adj = 0)
# Figure 6B
plotIndiv(infx_train_splsda_final, comp = c(1, 2), pch = 1, ind.names = FALSE,
legend = TRUE, background = infx_train_background, col = c("goldenrod",
"gray87"), star = TRUE, point.lwd = 0.5, title = "sPLS-DA:\nAny Infection vs No Infection",
style = "graphics", legend.title = "Infection Group", X.label = paste0("Component 1 (",
round(infx_train_splsda_final$prop_expl_var$X[1] * 100),
"%)"), Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = max(infx_train_splsda_final$variates$X[, 2]) *
0.8, x = min(infx_train_splsda_final$variates$X[, 1]), infx_confusion_df,
bty = "o", display.rownames = TRUE, hlines = TRUE, vlines = TRUE,
cex = 0.75)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.6,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Accuracy = ",
round(infx_cm$overall[1] * 100, 1), "% [", round(infx_cm$overall[3] *
100, 1), "%, ", round(infx_cm$overall[4] * 100, 1),
"%]"), cex = 0.75, adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.5,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Sensitivity = ",
paste0(formatC(round(infx_epi$detail[2][3, ], 3) * 100,
digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"), ", ",
paste0(formatC(round(infx_epi$detail[4][3, ], 3) * 100,
digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.4,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Specificity = ",
paste0(formatC(round(infx_epi$detail[2][4, ], 3) * 100,
digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"), ", ",
paste0(formatC(round(infx_epi$detail[4][4, ], 3) * 100,
digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.3,
x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Odds Ratio = ",
formatC(round(infx_epi$detail[2][6, ], 3), digits = 1,
format = "f"), " [", formatC(round(infx_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(infx_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75,
adj = 0)# Take absolute value to make diversity group loadings plot
# narrower
diversity_train_splsda_final$loadings$X <- abs(diversity_train_splsda_final$loadings$X)
# Combine Supplemental Figures 2A + 2B
pdf(file = "./Results/Supplemental_Figure_2.pdf", height = 8,
width = 10)
par(mfrow = c(1, 3))
# Supplmental Figure 2A
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c(c("#3A001E", "#8A0246",
"#C20463")), size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 2B
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#3A001E", "#8A0246", "#C20463"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
par(mfrow = c(1, 3))
# Supplmental Figure 2A
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c(c("#3A001E", "#8A0246",
"#C20463")), size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 2B
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#3A001E", "#8A0246", "#C20463"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
diversity_healthy_train_splsda_final$loadings$X <- abs(diversity_healthy_train_splsda_final$loadings$X)
# Combine Supplemental Figures 2A + 2B
pdf(file = "./Results/Potential_Supplemental_Figure_2.pdf", height = 8,
width = 10)
par(mfrow = c(1, 3))
# Supplmental Figure 2A
plotLoadings(diversity_healthy_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c(c("#3A001E",
"#8A0246", "#C20463", "#1A49BE")), size.name = 0.6, size.title = rel(1),
ndisplay = 50)
# Supplmental Figure 2B
plotLoadings(diversity_healthy_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#3A001E", "#8A0246",
"#C20463", "#1A49BE"), size.name = 0.6, size.title = rel(1),
ndisplay = 50)
invisible(dev.off())
par(mfrow = c(1, 3))
# Supplmental Figure 2A
plotLoadings(diversity_healthy_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c(c("#3A001E",
"#8A0246", "#C20463", "#1A49BE")), size.name = 0.6, size.title = rel(1),
ndisplay = 50)
# Supplmental Figure 2B
plotLoadings(diversity_healthy_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#3A001E", "#8A0246",
"#C20463", "#1A49BE"), size.name = 0.6, size.title = rel(1),
ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
infx_train_splsda_final$loadings$X <- abs(infx_train_splsda_final$loadings$X)
# Combine Supplemental Figures 2A + 2B
pdf(file = "./Results/Supplemental_Figure_3.pdf", height = 8,
width = 10)
par(mfrow = c(1, 3))
# Supplmental Figure 3A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray87"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 3B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray87"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)
invisible(dev.off())
par(mfrow = c(1, 3))
# Supplmental Figure 3A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray87"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 3B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray87"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)flow_chart <- flow_exclusions(incl_counts = c(158, 130, 107,
25), total_label = "Total Patients Enrolled", incl_labels = c("Patients w/ Transplant",
"Patients Included", "Patients w/ Bacterial Infection"),
excl_labels = c("No Transplant", "No Sample In Study Period\nDay -7 to +30",
"Patients w/o Bacterial Infection"), show_count = TRUE)
flow_chartflow_chart %>%
export_svg() %>%
charToRaw() %>%
rsvg_pdf("./Results/Supplemental_Figure_4.pdf")## Vector of variables to summarize
demo_vars <- c("race", "sex", "age", "meld_transplant", "Alcoholic Hepatitis",
"Alcoholic Cirrhosis", "NAFLD/NASH", "Primary Sclerosing Cholangitis",
"Acute Viral Hepatitis", "Chronic Hepatitis B", "Chronic Hepatitis C",
"Autoimmune", "Wilson's Disease", "Alpha-1 Antitrypsin",
"Hemachromatosis", "Drug Induced Liver Injury or Toxin",
"Budd Chiari", "Cryptogenic", "Malignancy", "Other", "Dialysis",
"Pressers", "Mechanical Ventilation")
## Vector of categorical variables that need transformation
demo_cats <- c("race", "sex", "Alcoholic Hepatitis", "Alcoholic Cirrhosis",
"NAFLD/NASH", "Primary Sclerosing Cholangitis", "Acute Viral Hepatitis",
"Chronic Hepatitis B", "Chronic Hepatitis C", "Autoimmune",
"Wilson's Disease", "Alpha-1 Antitrypsin", "Hemachromatosis",
"Drug Induced Liver Injury or Toxin", "Budd Chiari", "Cryptogenic",
"Malignancy", "Other", "Dialysis", "Pressers", "Mechanical Ventilation")
tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "wilcox.test",
includeNA = TRUE, factorVars = demo_cats, strata = "any_infection",
data = demo)
summary(tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## any_infection: 0
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 82 0 0 51 17 55 40 63 2 77 -1.00 0.8
## meld_transplant 82 0 0 26 10 28 19 33 6 49 -0.04 -0.7
## ------------------------------------------------------------
## any_infection: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 25 0 0 56 15 59 46 68 22 73 -0.9 0.002
## meld_transplant 25 0 0 27 10 30 19 33 11 42 -0.3 -1.057
##
## p-values
## pNormal pNonNormal
## age 0.2072738 0.1765252
## meld_transplant 0.6624494 0.6085202
##
## Standardize mean differences
## 1 vs 2
## age 0.2976093
## meld_transplant 0.1025141
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss
## race 82 0 0.0
##
##
##
##
##
##
##
## sex 82 0 0.0
##
##
## Alcoholic Hepatitis 82 0 0.0
##
##
## Alcoholic Cirrhosis 82 0 0.0
##
##
## NAFLD/NASH 82 0 0.0
##
##
## Primary Sclerosing Cholangitis 82 0 0.0
##
##
## Acute Viral Hepatitis 82 0 0.0
##
##
## Chronic Hepatitis B 82 0 0.0
##
##
## Chronic Hepatitis C 82 0 0.0
##
##
## Autoimmune 82 0 0.0
##
##
## Wilson's Disease 82 0 0.0
##
##
## Alpha-1 Antitrypsin 82 0 0.0
##
## Hemachromatosis 82 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 82 0 0.0
##
##
## Budd Chiari 82 0 0.0
##
## Cryptogenic 82 0 0.0
##
##
## Malignancy 82 0 0.0
##
##
## Other 82 0 0.0
##
##
## Dialysis 82 0 0.0
##
##
## Pressers 82 0 0.0
##
##
## Mechanical Ventilation 82 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 1.2 1.2
## Asian/Mideast Indian 6 7.3 8.5
## Black/African-American 9 11.0 19.5
## More than one Race 8 9.8 29.3
## Patient Declined 4 4.9 34.1
## Unknown 0 0.0 34.1
## White 54 65.9 100.0
##
## Female 38 46.3 46.3
## Male 44 53.7 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 42 51.2 51.2
## 1 40 48.8 100.0
##
## 0 72 87.8 87.8
## 1 10 12.2 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## 0 80 97.6 97.6
## 1 2 2.4 100.0
##
## 0 82 100.0 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## 0 82 100.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 65 79.3 79.3
## 1 17 20.7 100.0
##
## 0 74 90.2 90.2
## 1 8 9.8 100.0
##
## 0 60 73.2 73.2
## 1 22 26.8 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss
## race 25 0 0.0
##
##
##
##
##
##
##
## sex 25 0 0.0
##
##
## Alcoholic Hepatitis 25 0 0.0
##
##
## Alcoholic Cirrhosis 25 0 0.0
##
##
## NAFLD/NASH 25 0 0.0
##
##
## Primary Sclerosing Cholangitis 25 0 0.0
##
##
## Acute Viral Hepatitis 25 0 0.0
##
##
## Chronic Hepatitis B 25 0 0.0
##
##
## Chronic Hepatitis C 25 0 0.0
##
##
## Autoimmune 25 0 0.0
##
##
## Wilson's Disease 25 0 0.0
##
##
## Alpha-1 Antitrypsin 25 0 0.0
##
## Hemachromatosis 25 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 25 0 0.0
##
##
## Budd Chiari 25 0 0.0
##
## Cryptogenic 25 0 0.0
##
##
## Malignancy 25 0 0.0
##
##
## Other 25 0 0.0
##
##
## Dialysis 25 0 0.0
##
##
## Pressers 25 0 0.0
##
##
## Mechanical Ventilation 25 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Asian/Mideast Indian 2 8.0 8.0
## Black/African-American 2 8.0 16.0
## More than one Race 2 8.0 24.0
## Patient Declined 1 4.0 28.0
## Unknown 2 8.0 36.0
## White 16 64.0 100.0
##
## Female 9 36.0 36.0
## Male 16 64.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
## 0 17 68.0 68.0
## 1 8 32.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 16 64.0 64.0
## 1 9 36.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
##
## p-values
## pApprox pExact
## race 0.3074905 0.4275466
## sex 0.4953032 0.4904875
## Alcoholic Hepatitis 1.0000000 1.0000000
## Alcoholic Cirrhosis 0.2123469 0.1713637
## NAFLD/NASH 0.2590605 0.1979409
## Primary Sclerosing Cholangitis 0.3704754 0.3322151
## Acute Viral Hepatitis 0.6007080 0.5709809
## Chronic Hepatitis B 0.5271112 0.2336449
## Chronic Hepatitis C 0.6007080 0.5709809
## Autoimmune 0.4694777 0.5886832
## Wilson's Disease 1.0000000 0.5538202
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis 0.5271112 0.2336449
## Drug Induced Liver Injury or Toxin 1.0000000 1.0000000
## Budd Chiari NA NA
## Cryptogenic 1.0000000 1.0000000
## Malignancy 0.9440592 0.7828238
## Other 0.3063991 0.1774710
## Dialysis 0.5266900 0.4513768
## Pressers 0.1465607 0.1238754
## Mechanical Ventilation 1.0000000 0.6642869
##
## Standardize mean differences
## 1 vs 2
## race 0.45891730
## sex 0.21130091
## Alcoholic Hepatitis 0.02568258
## Alcoholic Cirrhosis 0.34709743
## NAFLD/NASH 0.31029007
## Primary Sclerosing Cholangitis 0.39735971
## Acute Viral Hepatitis 0.32025631
## Chronic Hepatitis B 0.28867513
## Chronic Hepatitis C 0.32025631
## Autoimmune 0.36037499
## Wilson's Disease 0.08851811
## Alpha-1 Antitrypsin 0.00000000
## Hemachromatosis 0.28867513
## Drug Induced Liver Injury or Toxin 0.15713484
## Budd Chiari 0.00000000
## Cryptogenic 0.04264158
## Malignancy 0.07849392
## Other 0.29088216
## Dialysis 0.19854168
## Pressers 0.37578690
## Mechanical Ventilation 0.07437488
tableone_skewed <- c("age", "meld_transplant")
tab1_2 <- print(tab1_1, nonnormal = tableone_skewed, formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0
## n 82
## race (%)
## American Indian or Alaska Native 1 ( 1.2)
## Asian/Mideast Indian 6 ( 7.3)
## Black/African-American 9 ( 11.0)
## More than one Race 8 ( 9.8)
## Patient Declined 4 ( 4.9)
## Unknown 0 ( 0.0)
## White 54 ( 65.9)
## sex = Male (%) 44 ( 53.7)
## age (median [IQR]) 55.00 [39.50, 63.00]
## meld_transplant (median [IQR]) 28.00 [19.00, 33.00]
## Alcoholic Hepatitis = 1 (%) 6 ( 7.3)
## Alcoholic Cirrhosis = 1 (%) 40 ( 48.8)
## NAFLD/NASH = 1 (%) 10 ( 12.2)
## Primary Sclerosing Cholangitis = 1 (%) 6 ( 7.3)
## Acute Viral Hepatitis = 1 (%) 4 ( 4.9)
## Chronic Hepatitis B = 1 (%) 0 ( 0.0)
## Chronic Hepatitis C = 1 (%) 4 ( 4.9)
## Autoimmune = 1 (%) 5 ( 6.1)
## Wilson's Disease = 1 (%) 2 ( 2.4)
## Alpha-1 Antitrypsin = 0 (%) 82 (100.0)
## Hemachromatosis = 1 (%) 0 ( 0.0)
## Drug Induced Liver Injury or Toxin = 1 (%) 1 ( 1.2)
## Budd Chiari = 0 (%) 82 (100.0)
## Cryptogenic = 1 (%) 4 ( 4.9)
## Malignancy = 1 (%) 17 ( 20.7)
## Other = 1 (%) 8 ( 9.8)
## Dialysis = 1 (%) 22 ( 26.8)
## Pressers = 1 (%) 6 ( 7.3)
## Mechanical Ventilation = 1 (%) 5 ( 6.1)
## Stratified by any_infection
## 1 p
## n 25
## race (%) 0.307
## American Indian or Alaska Native 0 ( 0.0)
## Asian/Mideast Indian 2 ( 8.0)
## Black/African-American 2 ( 8.0)
## More than one Race 2 ( 8.0)
## Patient Declined 1 ( 4.0)
## Unknown 2 ( 8.0)
## White 16 ( 64.0)
## sex = Male (%) 16 ( 64.0) 0.495
## age (median [IQR]) 59.00 [46.00, 68.00] 0.177
## meld_transplant (median [IQR]) 30.00 [19.00, 33.00] 0.609
## Alcoholic Hepatitis = 1 (%) 2 ( 8.0) 1.000
## Alcoholic Cirrhosis = 1 (%) 8 ( 32.0) 0.212
## NAFLD/NASH = 1 (%) 6 ( 24.0) 0.259
## Primary Sclerosing Cholangitis = 1 (%) 0 ( 0.0) 0.370
## Acute Viral Hepatitis = 1 (%) 0 ( 0.0) 0.601
## Chronic Hepatitis B = 1 (%) 1 ( 4.0) 0.527
## Chronic Hepatitis C = 1 (%) 0 ( 0.0) 0.601
## Autoimmune = 1 (%) 0 ( 0.0) 0.469
## Wilson's Disease = 1 (%) 1 ( 4.0) 1.000
## Alpha-1 Antitrypsin = 0 (%) 25 (100.0) NA
## Hemachromatosis = 1 (%) 1 ( 4.0) 0.527
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0) 1.000
## Budd Chiari = 0 (%) 25 (100.0) NA
## Cryptogenic = 1 (%) 1 ( 4.0) 1.000
## Malignancy = 1 (%) 6 ( 24.0) 0.944
## Other = 1 (%) 5 ( 20.0) 0.306
## Dialysis = 1 (%) 9 ( 36.0) 0.527
## Pressers = 1 (%) 5 ( 20.0) 0.147
## Mechanical Ventilation = 1 (%) 2 ( 8.0) 1.000
## Stratified by any_infection
## test
## n
## race (%)
## American Indian or Alaska Native
## Asian/Mideast Indian
## Black/African-American
## More than one Race
## Patient Declined
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 1 (%)
## Acute Viral Hepatitis = 1 (%)
## Chronic Hepatitis B = 1 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 1 (%)
## Wilson's Disease = 1 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 1 (%)
## Drug Induced Liver Injury or Toxin = 1 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 1 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(tab1_2, "./Results/Demo_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
tab1_2_padjust1 <- read.csv("./Results/Demo_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
tab1_2_padjust2 <- tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = tab1_2_padjust1$` `))
tab1_2_padjust3 <- tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(tab1_2_padjust3, "./Results/Demo_Table_1_padjust.csv",
row.names = FALSE)# Percentage of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_percent_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, sampleID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
group_by(patientID, infx_stool, organisms) %>%
dplyr::slice(1) %>%
group_by(organisms) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(organisms) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Location of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_location_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, eday, key) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
key) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:key), names_to = "organisms", values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, key, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, infx_stool, key) %>%
dplyr::slice(1) %>%
group_by(key) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(key) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Type of Infection # where there is an infection within 30
# days after transplant and not 0 days before
cult_type_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", grepl(variable, pattern = "anatomy.+"),
between(eday, 0, 30)) %>%
group_by(patientID, eday, diag_cat2) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(diag_cat3 = case_when(grepl("abdominal", diag_cat2,
ignore.case = T) ~ "Abdominal", grepl("vir|bronchitis|covid-19|cmv",
diag_cat2, ignore.case = T) ~ "Viral", grepl("bacteremia",
diag_cat2, ignore.case = T) ~ "Bacteremia", grepl("thrush",
diag_cat2, ignore.case = T) ~ "Fungal", grepl("cholangitis|empyema|panniculitis|peritonitis|latent tb",
diag_cat2, ignore.case = T) ~ "Bacterial", grepl("cystitis|pyelonephritis",
diag_cat2, ignore.case = T) ~ "Urinary Tract Infection",
grepl("pneumonia", diag_cat2, ignore.case = T) ~ "Pneumonia",
TRUE ~ diag_cat2), diag_cat3 = str_to_title(diag_cat3)) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
micro1.factor, key, diag_cat3) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:diag_cat3), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, diag_cat3, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1, organisms != "No Bacterial Infection") %>%
ungroup() %>%
group_by(patientID, infx_stool, diag_cat3) %>%
dplyr::slice(1) %>%
group_by(diag_cat3) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
group_by(diag_cat3) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
culture_tot <- bind_rows(cult_percent_infx_all, cult_location_infx_all,
cult_type_infx_all) %>%
mutate(percent = round(percent, 2))
# Read in csv to then append adjusted pvalues
write.csv(culture_tot, "./Results/Culture_Table_1.csv", row.names = FALSE)## Vector of variables to summarize
abx_vars <- c("Basiliximab", "Mycophenolate", "Steroid", "Systemic Vancomycin",
"Tacrolimus", "Cefepime", "Metronidazole", "Piperacillin/Tazobactam",
"Rifaximin", "Ceftriaxone", "Ciprofloxacin", "Gentamicin",
"Tobramicin", "Daptomycin", "Meropenem", "Oral Vancomycin")
abx2 <- abx %>%
filter(grepl(pattern = "basilix|tacro|steroid|mycophenolate|lactulose",
medication_name, ignore.case = T) | grepl(pattern = "GLUCOCORTICOIDS|steroid",
med_pharm_class, ignore.case = T) | grepl(pattern = "steroid",
med_pharm_sub_class, ignore.case = T) | grepl("given",
mar_action, ignore.case = T)) %>%
mutate(Immunosuppressants = case_when(grepl("basilix", medication_name,
ignore.case = T) ~ "Basiliximab", grepl("tacro", medication_name,
ignore.case = T) ~ "Tacrolimus", grepl("one|ide|solu-cortef",
medication_name, ignore.case = T) ~ "Steroid", grepl("mycophenolate",
medication_name, ignore.case = T) ~ "Mycophenolate",
grepl("lactulose", medication_name, ignore.case = T) ~
"Lactulose"), Antibiotics = case_when(grepl("rifaximin",
medication_name, ignore.case = T) ~ "Rifaximin", grepl("lactulose",
medication_name, ignore.case = T) ~ "Lactulose", grepl("ceftriaxone",
medication_name, ignore.case = T) ~ "Ceftriaxone", grepl("piperacillin|tazobactam",
medication_name, ignore.case = T) ~ "Piperacillin/Tazobactam",
grepl("cefepime", medication_name, ignore.case = T) ~
"Cefepime", grepl("meropenem", medication_name, ignore.case = T) ~
"Meropenem", grepl("gentamicin", medication_name,
ignore.case = T) ~ "Gentamicin", grepl("tobramycin",
medication_name, ignore.case = T) ~ "Tobramicin",
grepl("vancomycin.+oral", medication_name, ignore.case = T) ~
"Oral Vancomycin", grepl("vancomycin.+(IV|Intravenous)",
medication_name, ignore.case = T) ~ "Systemic Vancomycin",
grepl("METRONIDAZOLE", medication_name, ignore.case = T) ~
"Metronidazole", grepl("DAPTOMYCIN", medication_name,
ignore.case = T) ~ "Daptomycin", grepl("linezolid",
medication_name, ignore.case = T) ~ "Linezolid",
grepl("fluconazole", medication_name, ignore.case = T) ~
"Fluconazole", grepl("micafungin", medication_name,
ignore.case = T) ~ "Micafungin", grepl("cipro", medication_name,
ignore.case = T) & !grepl("drop", dose_units, ignore.case = T) ~
"Ciprofloxacin"), action = case_when(!is.na(Immunosuppressants) &
between(days_transplant, 0, 30) | !is.na(Immunosuppressants) &
ordering_mode == "Outpatient" ~ "keep", !is.na(Antibiotics) &
between(days_transplant, -7, 1) ~ "keep", TRUE ~ "remove")) %>%
group_by(patientID, Immunosuppressants, Antibiotics) %>%
arrange(days_transplant) %>%
filter(action == "keep") %>%
dplyr::slice(1) %>%
select(patientID, Immunosuppressants, Antibiotics) %>%
left_join(peri_matrix_all %>%
select(patientID, any_infection)) %>%
pivot_longer(!c(patientID, any_infection), names_to = "variable",
values_to = "value") %>%
drop_na(value) %>%
mutate(variable = 1) %>%
pivot_wider(c(patientID, any_infection), names_from = "value",
values_from = "variable", values_fn = min) %>%
replace(is.na(.), 0)
abx_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "wilcox.test",
includeNA = FALSE, factorVars = abx_vars, strata = "any_infection",
data = abx2)
summary(abx_tab1_1)##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss level freq percent cum.percent
## Basiliximab 82 0 0.0 0 28 34.1 34.1
## 1 54 65.9 100.0
##
## Mycophenolate 82 0 0.0 0 10 12.2 12.2
## 1 72 87.8 100.0
##
## Steroid 82 0 0.0 1 82 100.0 100.0
##
## Systemic Vancomycin 82 0 0.0 0 40 48.8 48.8
## 1 42 51.2 100.0
##
## Tacrolimus 82 0 0.0 1 82 100.0 100.0
##
## Cefepime 82 0 0.0 0 56 68.3 68.3
## 1 26 31.7 100.0
##
## Metronidazole 82 0 0.0 0 48 58.5 58.5
## 1 34 41.5 100.0
##
## Piperacillin/Tazobactam 82 0 0.0 0 12 14.6 14.6
## 1 70 85.4 100.0
##
## Rifaximin 82 0 0.0 0 45 54.9 54.9
## 1 37 45.1 100.0
##
## Ceftriaxone 82 0 0.0 0 66 80.5 80.5
## 1 16 19.5 100.0
##
## Ciprofloxacin 82 0 0.0 0 67 81.7 81.7
## 1 15 18.3 100.0
##
## Gentamicin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Tobramicin 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Daptomycin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Meropenem 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Oral Vancomycin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss level freq percent cum.percent
## Basiliximab 25 0 0.0 0 7 28.0 28.0
## 1 18 72.0 100.0
##
## Mycophenolate 25 0 0.0 0 1 4.0 4.0
## 1 24 96.0 100.0
##
## Steroid 25 0 0.0 1 25 100.0 100.0
##
## Systemic Vancomycin 25 0 0.0 0 8 32.0 32.0
## 1 17 68.0 100.0
##
## Tacrolimus 25 0 0.0 1 25 100.0 100.0
##
## Cefepime 25 0 0.0 0 14 56.0 56.0
## 1 11 44.0 100.0
##
## Metronidazole 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Piperacillin/Tazobactam 25 0 0.0 0 2 8.0 8.0
## 1 23 92.0 100.0
##
## Rifaximin 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Ceftriaxone 25 0 0.0 0 18 72.0 72.0
## 1 7 28.0 100.0
##
## Ciprofloxacin 25 0 0.0 0 21 84.0 84.0
## 1 4 16.0 100.0
##
## Gentamicin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Tobramicin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Daptomycin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Meropenem 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Oral Vancomycin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 0.74143512 0.63341955
## Mycophenolate 0.42082759 0.45169519
## Steroid NA NA
## Systemic Vancomycin 0.21234695 0.17136371
## Tacrolimus NA NA
## Cefepime 0.37287647 0.33702201
## Metronidazole 0.72844870 0.64658181
## Piperacillin/Tazobactam 0.60142514 0.51269562
## Rifaximin 0.98119534 0.82233980
## Ceftriaxone 0.53110302 0.40817712
## Ciprofloxacin 1.00000000 1.00000000
## Gentamicin 0.95599591 0.41438900
## Tobramicin 0.42443880 0.35037337
## Daptomycin 0.05938894 0.03899733
## Meropenem 1.00000000 1.00000000
## Oral Vancomycin 0.95599591 0.41438900
##
## Standardize mean differences
## 1 vs 2
## Basiliximab 0.13310348
## Mycophenolate 0.30385759
## Steroid 0.00000000
## Systemic Vancomycin 0.34709743
## Tacrolimus 0.00000000
## Cefepime 0.25550557
## Metronidazole 0.13174842
## Piperacillin/Tazobactam 0.21056770
## Rifaximin 0.05772164
## Ceftriaxone 0.20043582
## Ciprofloxacin 0.06085600
## Gentamicin 0.17507370
## Tobramicin 0.25833951
## Daptomycin 0.44449214
## Meropenem 0.04264158
## Oral Vancomycin 0.17507370
abx_tab1_2 <- print(abx_tab1_1, formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0 1 p test
## n 82 25
## Basiliximab = 1 (%) 54 ( 65.9) 18 ( 72.0) 0.741
## Mycophenolate = 1 (%) 72 ( 87.8) 24 ( 96.0) 0.421
## Steroid = 1 (%) 82 (100.0) 25 (100.0) NA
## Systemic Vancomycin = 1 (%) 42 ( 51.2) 17 ( 68.0) 0.212
## Tacrolimus = 1 (%) 82 (100.0) 25 (100.0) NA
## Cefepime = 1 (%) 26 ( 31.7) 11 ( 44.0) 0.373
## Metronidazole = 1 (%) 34 ( 41.5) 12 ( 48.0) 0.728
## Piperacillin/Tazobactam = 1 (%) 70 ( 85.4) 23 ( 92.0) 0.601
## Rifaximin = 1 (%) 37 ( 45.1) 12 ( 48.0) 0.981
## Ceftriaxone = 1 (%) 16 ( 19.5) 7 ( 28.0) 0.531
## Ciprofloxacin = 1 (%) 15 ( 18.3) 4 ( 16.0) 1.000
## Gentamicin = 1 (%) 1 ( 1.2) 1 ( 4.0) 0.956
## Tobramicin = 1 (%) 4 ( 4.9) 3 ( 12.0) 0.424
## Daptomycin = 1 (%) 1 ( 1.2) 3 ( 12.0) 0.059
## Meropenem = 1 (%) 4 ( 4.9) 1 ( 4.0) 1.000
## Oral Vancomycin = 1 (%) 1 ( 1.2) 1 ( 4.0) 0.956
write.csv(abx_tab1_2, "./Results/ABX_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
abx_tab1_2_padjust1 <- read.csv("./Results/ABX_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
abx_tab1_2_padjust2 <- abx_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = abx_tab1_2_padjust1$` `))
abx_tab1_2_padjust3 <- abx_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, abx_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(abx_tab1_2_padjust3, "./Results/ABX_Table_1_padjust.csv",
row.names = FALSE)# Expansions and Infections Stats
expan_infx_stats <-
peri_matrix_all %>%
left_join(peri_matrix_all %>% select(sampleID, ends_with("infection")) %>% ungroup()) %>%
mutate(ecoc_infx = enterococcus_infection,
ecoc_infx = ifelse(ecoc_infx >= 1, 1, 0),
ebac_infx = enterobacterales_infection,
ebac_infx = ifelse(ebac_infx >= 1, 1, 0)) %>%
select(enterococcus_rel_abundance, enterobacterales_rel_abundance, ecoc_infx, ebac_infx) %>%
summarise(ecoc_expan_above_20_pct = sum(enterococcus_rel_abundance >= 0.2),
ecoc_expan_above_cutpoint = sum(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2]),
ecoc_infx_ecoc_below_cutpoint = sum(ecoc_infx == 1 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2], na.rm=T),
ecoc_infx_ecoc_above_cutpoint = sum(ecoc_infx == 1 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2], na.rm=T),
ebac_expan_above_5_pct = sum(enterobacterales_rel_abundance >= 0.05),
ebac_expan_above_cutpoint = sum(enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]),
ebac_infx_ebac_below_cutpoint = sum(ebac_infx == 1 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1], na.rm=T),
ebac_infx_ebac_above_cutpoint = sum(ebac_infx == 1 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1], na.rm=T)) %>%
rownames_to_column(var = "rowname") %>%
pivot_longer(-rowname, names_to = "metric", values_to = "count") %>%
mutate(percent = round((count / 107) * 100, 2)) %>%
select(-rowname)
expan_infx_stats## # A tibble: 8 × 3
## metric count percent
## <chr> <int> <dbl>
## 1 ecoc_expan_above_20_pct 43 40.2
## 2 ecoc_expan_above_cutpoint 44 41.1
## 3 ecoc_infx_ecoc_below_cutpoint 2 1.87
## 4 ecoc_infx_ecoc_above_cutpoint 15 14.0
## 5 ebac_expan_above_5_pct 18 16.8
## 6 ebac_expan_above_cutpoint 29 27.1
## 7 ebac_infx_ebac_below_cutpoint 0 0
## 8 ebac_infx_ebac_above_cutpoint 8 7.48
write.csv(expan_infx_stats, "./Results/Appendix_Table_1.csv", row.names = FALSE)save.image(file = "./Data/LT_Modeling.RData")